sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 154 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 155 156 157def build_locate_strposition(args: t.List): 158 return exp.StrPosition( 159 this=seq_get(args, 1), 160 substr=seq_get(args, 0), 161 position=seq_get(args, 2), 162 ) 163 164 165class _Parser(type): 166 def __new__(cls, clsname, bases, attrs): 167 klass = super().__new__(cls, clsname, bases, attrs) 168 169 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 170 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 171 172 return klass 173 174 175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.BLOB, 332 TokenType.MEDIUMBLOB, 333 TokenType.LONGBLOB, 334 TokenType.BINARY, 335 TokenType.VARBINARY, 336 TokenType.JSON, 337 TokenType.JSONB, 338 TokenType.INTERVAL, 339 TokenType.TINYBLOB, 340 TokenType.TINYTEXT, 341 TokenType.TIME, 342 TokenType.TIMETZ, 343 TokenType.TIMESTAMP, 344 TokenType.TIMESTAMP_S, 345 TokenType.TIMESTAMP_MS, 346 TokenType.TIMESTAMP_NS, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 TokenType.TIMESTAMPNTZ, 350 TokenType.DATETIME, 351 TokenType.DATETIME2, 352 TokenType.DATETIME64, 353 TokenType.SMALLDATETIME, 354 TokenType.DATE, 355 TokenType.DATE32, 356 TokenType.INT4RANGE, 357 TokenType.INT4MULTIRANGE, 358 TokenType.INT8RANGE, 359 TokenType.INT8MULTIRANGE, 360 TokenType.NUMRANGE, 361 TokenType.NUMMULTIRANGE, 362 TokenType.TSRANGE, 363 TokenType.TSMULTIRANGE, 364 TokenType.TSTZRANGE, 365 TokenType.TSTZMULTIRANGE, 366 TokenType.DATERANGE, 367 TokenType.DATEMULTIRANGE, 368 TokenType.DECIMAL, 369 TokenType.DECIMAL32, 370 TokenType.DECIMAL64, 371 TokenType.DECIMAL128, 372 TokenType.DECIMAL256, 373 TokenType.UDECIMAL, 374 TokenType.BIGDECIMAL, 375 TokenType.UUID, 376 TokenType.GEOGRAPHY, 377 TokenType.GEOMETRY, 378 TokenType.POINT, 379 TokenType.RING, 380 TokenType.LINESTRING, 381 TokenType.MULTILINESTRING, 382 TokenType.POLYGON, 383 TokenType.MULTIPOLYGON, 384 TokenType.HLLSKETCH, 385 TokenType.HSTORE, 386 TokenType.PSEUDO_TYPE, 387 TokenType.SUPER, 388 TokenType.SERIAL, 389 TokenType.SMALLSERIAL, 390 TokenType.BIGSERIAL, 391 TokenType.XML, 392 TokenType.YEAR, 393 TokenType.USERDEFINED, 394 TokenType.MONEY, 395 TokenType.SMALLMONEY, 396 TokenType.ROWVERSION, 397 TokenType.IMAGE, 398 TokenType.VARIANT, 399 TokenType.VECTOR, 400 TokenType.OBJECT, 401 TokenType.OBJECT_IDENTIFIER, 402 TokenType.INET, 403 TokenType.IPADDRESS, 404 TokenType.IPPREFIX, 405 TokenType.IPV4, 406 TokenType.IPV6, 407 TokenType.UNKNOWN, 408 TokenType.NULL, 409 TokenType.NAME, 410 TokenType.TDIGEST, 411 TokenType.DYNAMIC, 412 *ENUM_TYPE_TOKENS, 413 *NESTED_TYPE_TOKENS, 414 *AGGREGATE_TYPE_TOKENS, 415 } 416 417 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 418 TokenType.BIGINT: TokenType.UBIGINT, 419 TokenType.INT: TokenType.UINT, 420 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 421 TokenType.SMALLINT: TokenType.USMALLINT, 422 TokenType.TINYINT: TokenType.UTINYINT, 423 TokenType.DECIMAL: TokenType.UDECIMAL, 424 TokenType.DOUBLE: TokenType.UDOUBLE, 425 } 426 427 SUBQUERY_PREDICATES = { 428 TokenType.ANY: exp.Any, 429 TokenType.ALL: exp.All, 430 TokenType.EXISTS: exp.Exists, 431 TokenType.SOME: exp.Any, 432 } 433 434 RESERVED_TOKENS = { 435 *Tokenizer.SINGLE_TOKENS.values(), 436 TokenType.SELECT, 437 } - {TokenType.IDENTIFIER} 438 439 DB_CREATABLES = { 440 TokenType.DATABASE, 441 TokenType.DICTIONARY, 442 TokenType.FILE_FORMAT, 443 TokenType.MODEL, 444 TokenType.NAMESPACE, 445 TokenType.SCHEMA, 446 TokenType.SEQUENCE, 447 TokenType.SINK, 448 TokenType.SOURCE, 449 TokenType.STAGE, 450 TokenType.STORAGE_INTEGRATION, 451 TokenType.STREAMLIT, 452 TokenType.TABLE, 453 TokenType.TAG, 454 TokenType.VIEW, 455 TokenType.WAREHOUSE, 456 } 457 458 CREATABLES = { 459 TokenType.COLUMN, 460 TokenType.CONSTRAINT, 461 TokenType.FOREIGN_KEY, 462 TokenType.FUNCTION, 463 TokenType.INDEX, 464 TokenType.PROCEDURE, 465 *DB_CREATABLES, 466 } 467 468 ALTERABLES = { 469 TokenType.INDEX, 470 TokenType.TABLE, 471 TokenType.VIEW, 472 } 473 474 # Tokens that can represent identifiers 475 ID_VAR_TOKENS = { 476 TokenType.ALL, 477 TokenType.ATTACH, 478 TokenType.VAR, 479 TokenType.ANTI, 480 TokenType.APPLY, 481 TokenType.ASC, 482 TokenType.ASOF, 483 TokenType.AUTO_INCREMENT, 484 TokenType.BEGIN, 485 TokenType.BPCHAR, 486 TokenType.CACHE, 487 TokenType.CASE, 488 TokenType.COLLATE, 489 TokenType.COMMAND, 490 TokenType.COMMENT, 491 TokenType.COMMIT, 492 TokenType.CONSTRAINT, 493 TokenType.COPY, 494 TokenType.CUBE, 495 TokenType.CURRENT_SCHEMA, 496 TokenType.DEFAULT, 497 TokenType.DELETE, 498 TokenType.DESC, 499 TokenType.DESCRIBE, 500 TokenType.DETACH, 501 TokenType.DICTIONARY, 502 TokenType.DIV, 503 TokenType.END, 504 TokenType.EXECUTE, 505 TokenType.EXPORT, 506 TokenType.ESCAPE, 507 TokenType.FALSE, 508 TokenType.FIRST, 509 TokenType.FILTER, 510 TokenType.FINAL, 511 TokenType.FORMAT, 512 TokenType.FULL, 513 TokenType.IDENTIFIER, 514 TokenType.IS, 515 TokenType.ISNULL, 516 TokenType.INTERVAL, 517 TokenType.KEEP, 518 TokenType.KILL, 519 TokenType.LEFT, 520 TokenType.LIMIT, 521 TokenType.LOAD, 522 TokenType.MERGE, 523 TokenType.NATURAL, 524 TokenType.NEXT, 525 TokenType.OFFSET, 526 TokenType.OPERATOR, 527 TokenType.ORDINALITY, 528 TokenType.OVERLAPS, 529 TokenType.OVERWRITE, 530 TokenType.PARTITION, 531 TokenType.PERCENT, 532 TokenType.PIVOT, 533 TokenType.PRAGMA, 534 TokenType.PUT, 535 TokenType.RANGE, 536 TokenType.RECURSIVE, 537 TokenType.REFERENCES, 538 TokenType.REFRESH, 539 TokenType.RENAME, 540 TokenType.REPLACE, 541 TokenType.RIGHT, 542 TokenType.ROLLUP, 543 TokenType.ROW, 544 TokenType.ROWS, 545 TokenType.SEMI, 546 TokenType.SET, 547 TokenType.SETTINGS, 548 TokenType.SHOW, 549 TokenType.TEMPORARY, 550 TokenType.TOP, 551 TokenType.TRUE, 552 TokenType.TRUNCATE, 553 TokenType.UNIQUE, 554 TokenType.UNNEST, 555 TokenType.UNPIVOT, 556 TokenType.UPDATE, 557 TokenType.USE, 558 TokenType.VOLATILE, 559 TokenType.WINDOW, 560 *CREATABLES, 561 *SUBQUERY_PREDICATES, 562 *TYPE_TOKENS, 563 *NO_PAREN_FUNCTIONS, 564 } 565 ID_VAR_TOKENS.remove(TokenType.UNION) 566 567 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 568 TokenType.ANTI, 569 TokenType.APPLY, 570 TokenType.ASOF, 571 TokenType.FULL, 572 TokenType.LEFT, 573 TokenType.LOCK, 574 TokenType.NATURAL, 575 TokenType.RIGHT, 576 TokenType.SEMI, 577 TokenType.WINDOW, 578 } 579 580 ALIAS_TOKENS = ID_VAR_TOKENS 581 582 ARRAY_CONSTRUCTORS = { 583 "ARRAY": exp.Array, 584 "LIST": exp.List, 585 } 586 587 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 588 589 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 590 591 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 592 593 FUNC_TOKENS = { 594 TokenType.COLLATE, 595 TokenType.COMMAND, 596 TokenType.CURRENT_DATE, 597 TokenType.CURRENT_DATETIME, 598 TokenType.CURRENT_SCHEMA, 599 TokenType.CURRENT_TIMESTAMP, 600 TokenType.CURRENT_TIME, 601 TokenType.CURRENT_USER, 602 TokenType.FILTER, 603 TokenType.FIRST, 604 TokenType.FORMAT, 605 TokenType.GLOB, 606 TokenType.IDENTIFIER, 607 TokenType.INDEX, 608 TokenType.ISNULL, 609 TokenType.ILIKE, 610 TokenType.INSERT, 611 TokenType.LIKE, 612 TokenType.MERGE, 613 TokenType.NEXT, 614 TokenType.OFFSET, 615 TokenType.PRIMARY_KEY, 616 TokenType.RANGE, 617 TokenType.REPLACE, 618 TokenType.RLIKE, 619 TokenType.ROW, 620 TokenType.UNNEST, 621 TokenType.VAR, 622 TokenType.LEFT, 623 TokenType.RIGHT, 624 TokenType.SEQUENCE, 625 TokenType.DATE, 626 TokenType.DATETIME, 627 TokenType.TABLE, 628 TokenType.TIMESTAMP, 629 TokenType.TIMESTAMPTZ, 630 TokenType.TRUNCATE, 631 TokenType.WINDOW, 632 TokenType.XOR, 633 *TYPE_TOKENS, 634 *SUBQUERY_PREDICATES, 635 } 636 637 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 638 TokenType.AND: exp.And, 639 } 640 641 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 642 TokenType.COLON_EQ: exp.PropertyEQ, 643 } 644 645 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.OR: exp.Or, 647 } 648 649 EQUALITY = { 650 TokenType.EQ: exp.EQ, 651 TokenType.NEQ: exp.NEQ, 652 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 653 } 654 655 COMPARISON = { 656 TokenType.GT: exp.GT, 657 TokenType.GTE: exp.GTE, 658 TokenType.LT: exp.LT, 659 TokenType.LTE: exp.LTE, 660 } 661 662 BITWISE = { 663 TokenType.AMP: exp.BitwiseAnd, 664 TokenType.CARET: exp.BitwiseXor, 665 TokenType.PIPE: exp.BitwiseOr, 666 } 667 668 TERM = { 669 TokenType.DASH: exp.Sub, 670 TokenType.PLUS: exp.Add, 671 TokenType.MOD: exp.Mod, 672 TokenType.COLLATE: exp.Collate, 673 } 674 675 FACTOR = { 676 TokenType.DIV: exp.IntDiv, 677 TokenType.LR_ARROW: exp.Distance, 678 TokenType.SLASH: exp.Div, 679 TokenType.STAR: exp.Mul, 680 } 681 682 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 683 684 TIMES = { 685 TokenType.TIME, 686 TokenType.TIMETZ, 687 } 688 689 TIMESTAMPS = { 690 TokenType.TIMESTAMP, 691 TokenType.TIMESTAMPNTZ, 692 TokenType.TIMESTAMPTZ, 693 TokenType.TIMESTAMPLTZ, 694 *TIMES, 695 } 696 697 SET_OPERATIONS = { 698 TokenType.UNION, 699 TokenType.INTERSECT, 700 TokenType.EXCEPT, 701 } 702 703 JOIN_METHODS = { 704 TokenType.ASOF, 705 TokenType.NATURAL, 706 TokenType.POSITIONAL, 707 } 708 709 JOIN_SIDES = { 710 TokenType.LEFT, 711 TokenType.RIGHT, 712 TokenType.FULL, 713 } 714 715 JOIN_KINDS = { 716 TokenType.ANTI, 717 TokenType.CROSS, 718 TokenType.INNER, 719 TokenType.OUTER, 720 TokenType.SEMI, 721 TokenType.STRAIGHT_JOIN, 722 } 723 724 JOIN_HINTS: t.Set[str] = set() 725 726 LAMBDAS = { 727 TokenType.ARROW: lambda self, expressions: self.expression( 728 exp.Lambda, 729 this=self._replace_lambda( 730 self._parse_assignment(), 731 expressions, 732 ), 733 expressions=expressions, 734 ), 735 TokenType.FARROW: lambda self, expressions: self.expression( 736 exp.Kwarg, 737 this=exp.var(expressions[0].name), 738 expression=self._parse_assignment(), 739 ), 740 } 741 742 COLUMN_OPERATORS = { 743 TokenType.DOT: None, 744 TokenType.DOTCOLON: lambda self, this, to: self.expression( 745 exp.JSONCast, 746 this=this, 747 to=to, 748 ), 749 TokenType.DCOLON: lambda self, this, to: self.expression( 750 exp.Cast if self.STRICT_CAST else exp.TryCast, 751 this=this, 752 to=to, 753 ), 754 TokenType.ARROW: lambda self, this, path: self.expression( 755 exp.JSONExtract, 756 this=this, 757 expression=self.dialect.to_json_path(path), 758 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 759 ), 760 TokenType.DARROW: lambda self, this, path: self.expression( 761 exp.JSONExtractScalar, 762 this=this, 763 expression=self.dialect.to_json_path(path), 764 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 765 ), 766 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 767 exp.JSONBExtract, 768 this=this, 769 expression=path, 770 ), 771 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 772 exp.JSONBExtractScalar, 773 this=this, 774 expression=path, 775 ), 776 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 777 exp.JSONBContains, 778 this=this, 779 expression=key, 780 ), 781 } 782 783 EXPRESSION_PARSERS = { 784 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 785 exp.Column: lambda self: self._parse_column(), 786 exp.Condition: lambda self: self._parse_assignment(), 787 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 788 exp.Expression: lambda self: self._parse_expression(), 789 exp.From: lambda self: self._parse_from(joins=True), 790 exp.Group: lambda self: self._parse_group(), 791 exp.Having: lambda self: self._parse_having(), 792 exp.Hint: lambda self: self._parse_hint_body(), 793 exp.Identifier: lambda self: self._parse_id_var(), 794 exp.Join: lambda self: self._parse_join(), 795 exp.Lambda: lambda self: self._parse_lambda(), 796 exp.Lateral: lambda self: self._parse_lateral(), 797 exp.Limit: lambda self: self._parse_limit(), 798 exp.Offset: lambda self: self._parse_offset(), 799 exp.Order: lambda self: self._parse_order(), 800 exp.Ordered: lambda self: self._parse_ordered(), 801 exp.Properties: lambda self: self._parse_properties(), 802 exp.Qualify: lambda self: self._parse_qualify(), 803 exp.Returning: lambda self: self._parse_returning(), 804 exp.Select: lambda self: self._parse_select(), 805 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 806 exp.Table: lambda self: self._parse_table_parts(), 807 exp.TableAlias: lambda self: self._parse_table_alias(), 808 exp.Tuple: lambda self: self._parse_value(values=False), 809 exp.Whens: lambda self: self._parse_when_matched(), 810 exp.Where: lambda self: self._parse_where(), 811 exp.Window: lambda self: self._parse_named_window(), 812 exp.With: lambda self: self._parse_with(), 813 "JOIN_TYPE": lambda self: self._parse_join_parts(), 814 } 815 816 STATEMENT_PARSERS = { 817 TokenType.ALTER: lambda self: self._parse_alter(), 818 TokenType.ANALYZE: lambda self: self._parse_analyze(), 819 TokenType.BEGIN: lambda self: self._parse_transaction(), 820 TokenType.CACHE: lambda self: self._parse_cache(), 821 TokenType.COMMENT: lambda self: self._parse_comment(), 822 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 823 TokenType.COPY: lambda self: self._parse_copy(), 824 TokenType.CREATE: lambda self: self._parse_create(), 825 TokenType.DELETE: lambda self: self._parse_delete(), 826 TokenType.DESC: lambda self: self._parse_describe(), 827 TokenType.DESCRIBE: lambda self: self._parse_describe(), 828 TokenType.DROP: lambda self: self._parse_drop(), 829 TokenType.GRANT: lambda self: self._parse_grant(), 830 TokenType.INSERT: lambda self: self._parse_insert(), 831 TokenType.KILL: lambda self: self._parse_kill(), 832 TokenType.LOAD: lambda self: self._parse_load(), 833 TokenType.MERGE: lambda self: self._parse_merge(), 834 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 835 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 836 TokenType.REFRESH: lambda self: self._parse_refresh(), 837 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 838 TokenType.SET: lambda self: self._parse_set(), 839 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 840 TokenType.UNCACHE: lambda self: self._parse_uncache(), 841 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 842 TokenType.UPDATE: lambda self: self._parse_update(), 843 TokenType.USE: lambda self: self._parse_use(), 844 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 845 } 846 847 UNARY_PARSERS = { 848 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 849 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 850 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 851 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 852 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 853 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 854 } 855 856 STRING_PARSERS = { 857 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 858 exp.RawString, this=token.text 859 ), 860 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 861 exp.National, this=token.text 862 ), 863 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 864 TokenType.STRING: lambda self, token: self.expression( 865 exp.Literal, this=token.text, is_string=True 866 ), 867 TokenType.UNICODE_STRING: lambda self, token: self.expression( 868 exp.UnicodeString, 869 this=token.text, 870 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 871 ), 872 } 873 874 NUMERIC_PARSERS = { 875 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 876 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 877 TokenType.HEX_STRING: lambda self, token: self.expression( 878 exp.HexString, 879 this=token.text, 880 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 881 ), 882 TokenType.NUMBER: lambda self, token: self.expression( 883 exp.Literal, this=token.text, is_string=False 884 ), 885 } 886 887 PRIMARY_PARSERS = { 888 **STRING_PARSERS, 889 **NUMERIC_PARSERS, 890 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 891 TokenType.NULL: lambda self, _: self.expression(exp.Null), 892 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 893 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 894 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 895 TokenType.STAR: lambda self, _: self._parse_star_ops(), 896 } 897 898 PLACEHOLDER_PARSERS = { 899 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 900 TokenType.PARAMETER: lambda self: self._parse_parameter(), 901 TokenType.COLON: lambda self: ( 902 self.expression(exp.Placeholder, this=self._prev.text) 903 if self._match_set(self.ID_VAR_TOKENS) 904 else None 905 ), 906 } 907 908 RANGE_PARSERS = { 909 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 910 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 911 TokenType.GLOB: binary_range_parser(exp.Glob), 912 TokenType.ILIKE: binary_range_parser(exp.ILike), 913 TokenType.IN: lambda self, this: self._parse_in(this), 914 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 915 TokenType.IS: lambda self, this: self._parse_is(this), 916 TokenType.LIKE: binary_range_parser(exp.Like), 917 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 918 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 919 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 920 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 921 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 922 } 923 924 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 925 "ALLOWED_VALUES": lambda self: self.expression( 926 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 927 ), 928 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 929 "AUTO": lambda self: self._parse_auto_property(), 930 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 931 "BACKUP": lambda self: self.expression( 932 exp.BackupProperty, this=self._parse_var(any_token=True) 933 ), 934 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 935 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 936 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 937 "CHECKSUM": lambda self: self._parse_checksum(), 938 "CLUSTER BY": lambda self: self._parse_cluster(), 939 "CLUSTERED": lambda self: self._parse_clustered_by(), 940 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 941 exp.CollateProperty, **kwargs 942 ), 943 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 944 "CONTAINS": lambda self: self._parse_contains_property(), 945 "COPY": lambda self: self._parse_copy_property(), 946 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 947 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 948 "DEFINER": lambda self: self._parse_definer(), 949 "DETERMINISTIC": lambda self: self.expression( 950 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 951 ), 952 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 953 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 954 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 955 "DISTKEY": lambda self: self._parse_distkey(), 956 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 957 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 958 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 959 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 960 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 961 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 962 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 963 "FREESPACE": lambda self: self._parse_freespace(), 964 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 965 "HEAP": lambda self: self.expression(exp.HeapProperty), 966 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 967 "IMMUTABLE": lambda self: self.expression( 968 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 969 ), 970 "INHERITS": lambda self: self.expression( 971 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 972 ), 973 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 974 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 975 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 976 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 977 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 978 "LIKE": lambda self: self._parse_create_like(), 979 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 980 "LOCK": lambda self: self._parse_locking(), 981 "LOCKING": lambda self: self._parse_locking(), 982 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 983 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 984 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 985 "MODIFIES": lambda self: self._parse_modifies_property(), 986 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 987 "NO": lambda self: self._parse_no_property(), 988 "ON": lambda self: self._parse_on_property(), 989 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 990 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 991 "PARTITION": lambda self: self._parse_partitioned_of(), 992 "PARTITION BY": lambda self: self._parse_partitioned_by(), 993 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 994 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 995 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 996 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 997 "READS": lambda self: self._parse_reads_property(), 998 "REMOTE": lambda self: self._parse_remote_with_connection(), 999 "RETURNS": lambda self: self._parse_returns(), 1000 "STRICT": lambda self: self.expression(exp.StrictProperty), 1001 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1002 "ROW": lambda self: self._parse_row(), 1003 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1004 "SAMPLE": lambda self: self.expression( 1005 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1006 ), 1007 "SECURE": lambda self: self.expression(exp.SecureProperty), 1008 "SECURITY": lambda self: self._parse_security(), 1009 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1010 "SETTINGS": lambda self: self._parse_settings_property(), 1011 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1012 "SORTKEY": lambda self: self._parse_sortkey(), 1013 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1014 "STABLE": lambda self: self.expression( 1015 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1016 ), 1017 "STORED": lambda self: self._parse_stored(), 1018 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1019 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1020 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1021 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1022 "TO": lambda self: self._parse_to_table(), 1023 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1024 "TRANSFORM": lambda self: self.expression( 1025 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1026 ), 1027 "TTL": lambda self: self._parse_ttl(), 1028 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1029 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1030 "VOLATILE": lambda self: self._parse_volatile_property(), 1031 "WITH": lambda self: self._parse_with_property(), 1032 } 1033 1034 CONSTRAINT_PARSERS = { 1035 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1036 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1037 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1038 "CHARACTER SET": lambda self: self.expression( 1039 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1040 ), 1041 "CHECK": lambda self: self.expression( 1042 exp.CheckColumnConstraint, 1043 this=self._parse_wrapped(self._parse_assignment), 1044 enforced=self._match_text_seq("ENFORCED"), 1045 ), 1046 "COLLATE": lambda self: self.expression( 1047 exp.CollateColumnConstraint, 1048 this=self._parse_identifier() or self._parse_column(), 1049 ), 1050 "COMMENT": lambda self: self.expression( 1051 exp.CommentColumnConstraint, this=self._parse_string() 1052 ), 1053 "COMPRESS": lambda self: self._parse_compress(), 1054 "CLUSTERED": lambda self: self.expression( 1055 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1056 ), 1057 "NONCLUSTERED": lambda self: self.expression( 1058 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1059 ), 1060 "DEFAULT": lambda self: self.expression( 1061 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1062 ), 1063 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1064 "EPHEMERAL": lambda self: self.expression( 1065 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1066 ), 1067 "EXCLUDE": lambda self: self.expression( 1068 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1069 ), 1070 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1071 "FORMAT": lambda self: self.expression( 1072 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1073 ), 1074 "GENERATED": lambda self: self._parse_generated_as_identity(), 1075 "IDENTITY": lambda self: self._parse_auto_increment(), 1076 "INLINE": lambda self: self._parse_inline(), 1077 "LIKE": lambda self: self._parse_create_like(), 1078 "NOT": lambda self: self._parse_not_constraint(), 1079 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1080 "ON": lambda self: ( 1081 self._match(TokenType.UPDATE) 1082 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1083 ) 1084 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1085 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1086 "PERIOD": lambda self: self._parse_period_for_system_time(), 1087 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1088 "REFERENCES": lambda self: self._parse_references(match=False), 1089 "TITLE": lambda self: self.expression( 1090 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1091 ), 1092 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1093 "UNIQUE": lambda self: self._parse_unique(), 1094 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1095 "WATERMARK": lambda self: self.expression( 1096 exp.WatermarkColumnConstraint, 1097 this=self._match(TokenType.FOR) and self._parse_column(), 1098 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1099 ), 1100 "WITH": lambda self: self.expression( 1101 exp.Properties, expressions=self._parse_wrapped_properties() 1102 ), 1103 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1104 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1105 } 1106 1107 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1108 klass = ( 1109 exp.PartitionedByBucket 1110 if self._prev.text.upper() == "BUCKET" 1111 else exp.PartitionByTruncate 1112 ) 1113 1114 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1115 this, expression = seq_get(args, 0), seq_get(args, 1) 1116 1117 if isinstance(this, exp.Literal): 1118 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1119 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1120 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1121 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1122 # 1123 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1124 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1125 this, expression = expression, this 1126 1127 return self.expression(klass, this=this, expression=expression) 1128 1129 ALTER_PARSERS = { 1130 "ADD": lambda self: self._parse_alter_table_add(), 1131 "AS": lambda self: self._parse_select(), 1132 "ALTER": lambda self: self._parse_alter_table_alter(), 1133 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1134 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1135 "DROP": lambda self: self._parse_alter_table_drop(), 1136 "RENAME": lambda self: self._parse_alter_table_rename(), 1137 "SET": lambda self: self._parse_alter_table_set(), 1138 "SWAP": lambda self: self.expression( 1139 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1140 ), 1141 } 1142 1143 ALTER_ALTER_PARSERS = { 1144 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1145 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1146 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1147 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1148 } 1149 1150 SCHEMA_UNNAMED_CONSTRAINTS = { 1151 "CHECK", 1152 "EXCLUDE", 1153 "FOREIGN KEY", 1154 "LIKE", 1155 "PERIOD", 1156 "PRIMARY KEY", 1157 "UNIQUE", 1158 "WATERMARK", 1159 "BUCKET", 1160 "TRUNCATE", 1161 } 1162 1163 NO_PAREN_FUNCTION_PARSERS = { 1164 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1165 "CASE": lambda self: self._parse_case(), 1166 "CONNECT_BY_ROOT": lambda self: self.expression( 1167 exp.ConnectByRoot, this=self._parse_column() 1168 ), 1169 "IF": lambda self: self._parse_if(), 1170 } 1171 1172 INVALID_FUNC_NAME_TOKENS = { 1173 TokenType.IDENTIFIER, 1174 TokenType.STRING, 1175 } 1176 1177 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1178 1179 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1180 1181 FUNCTION_PARSERS = { 1182 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1183 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1184 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1185 "DECODE": lambda self: self._parse_decode(), 1186 "EXTRACT": lambda self: self._parse_extract(), 1187 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1188 "GAP_FILL": lambda self: self._parse_gap_fill(), 1189 "JSON_OBJECT": lambda self: self._parse_json_object(), 1190 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1191 "JSON_TABLE": lambda self: self._parse_json_table(), 1192 "MATCH": lambda self: self._parse_match_against(), 1193 "NORMALIZE": lambda self: self._parse_normalize(), 1194 "OPENJSON": lambda self: self._parse_open_json(), 1195 "OVERLAY": lambda self: self._parse_overlay(), 1196 "POSITION": lambda self: self._parse_position(), 1197 "PREDICT": lambda self: self._parse_predict(), 1198 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1199 "STRING_AGG": lambda self: self._parse_string_agg(), 1200 "SUBSTRING": lambda self: self._parse_substring(), 1201 "TRIM": lambda self: self._parse_trim(), 1202 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1203 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1204 "XMLELEMENT": lambda self: self.expression( 1205 exp.XMLElement, 1206 this=self._match_text_seq("NAME") and self._parse_id_var(), 1207 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1208 ), 1209 "XMLTABLE": lambda self: self._parse_xml_table(), 1210 } 1211 1212 QUERY_MODIFIER_PARSERS = { 1213 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1214 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1215 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1216 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1217 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1218 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1219 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1220 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1221 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1222 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1223 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1224 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1225 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1226 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1227 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1228 TokenType.CLUSTER_BY: lambda self: ( 1229 "cluster", 1230 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1231 ), 1232 TokenType.DISTRIBUTE_BY: lambda self: ( 1233 "distribute", 1234 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1235 ), 1236 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1237 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1238 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1239 } 1240 1241 SET_PARSERS = { 1242 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1243 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1244 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1245 "TRANSACTION": lambda self: self._parse_set_transaction(), 1246 } 1247 1248 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1249 1250 TYPE_LITERAL_PARSERS = { 1251 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1252 } 1253 1254 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1255 1256 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1257 1258 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1259 1260 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1261 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1262 "ISOLATION": ( 1263 ("LEVEL", "REPEATABLE", "READ"), 1264 ("LEVEL", "READ", "COMMITTED"), 1265 ("LEVEL", "READ", "UNCOMITTED"), 1266 ("LEVEL", "SERIALIZABLE"), 1267 ), 1268 "READ": ("WRITE", "ONLY"), 1269 } 1270 1271 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1272 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1273 ) 1274 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1275 1276 CREATE_SEQUENCE: OPTIONS_TYPE = { 1277 "SCALE": ("EXTEND", "NOEXTEND"), 1278 "SHARD": ("EXTEND", "NOEXTEND"), 1279 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1280 **dict.fromkeys( 1281 ( 1282 "SESSION", 1283 "GLOBAL", 1284 "KEEP", 1285 "NOKEEP", 1286 "ORDER", 1287 "NOORDER", 1288 "NOCACHE", 1289 "CYCLE", 1290 "NOCYCLE", 1291 "NOMINVALUE", 1292 "NOMAXVALUE", 1293 "NOSCALE", 1294 "NOSHARD", 1295 ), 1296 tuple(), 1297 ), 1298 } 1299 1300 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1301 1302 USABLES: OPTIONS_TYPE = dict.fromkeys( 1303 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1304 ) 1305 1306 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1307 1308 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1309 "TYPE": ("EVOLUTION",), 1310 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1311 } 1312 1313 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1314 1315 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1316 1317 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1318 "NOT": ("ENFORCED",), 1319 "MATCH": ( 1320 "FULL", 1321 "PARTIAL", 1322 "SIMPLE", 1323 ), 1324 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1325 "USING": ( 1326 "BTREE", 1327 "HASH", 1328 ), 1329 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1330 } 1331 1332 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1333 1334 CLONE_KEYWORDS = {"CLONE", "COPY"} 1335 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1336 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1337 1338 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1339 1340 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1341 1342 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1343 1344 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1345 1346 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1347 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1348 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1349 1350 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1351 1352 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1353 1354 ADD_CONSTRAINT_TOKENS = { 1355 TokenType.CONSTRAINT, 1356 TokenType.FOREIGN_KEY, 1357 TokenType.INDEX, 1358 TokenType.KEY, 1359 TokenType.PRIMARY_KEY, 1360 TokenType.UNIQUE, 1361 } 1362 1363 DISTINCT_TOKENS = {TokenType.DISTINCT} 1364 1365 NULL_TOKENS = {TokenType.NULL} 1366 1367 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1368 1369 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1370 1371 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1372 1373 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1374 1375 ODBC_DATETIME_LITERALS = { 1376 "d": exp.Date, 1377 "t": exp.Time, 1378 "ts": exp.Timestamp, 1379 } 1380 1381 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1382 1383 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1384 1385 # The style options for the DESCRIBE statement 1386 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1387 1388 # The style options for the ANALYZE statement 1389 ANALYZE_STYLES = { 1390 "BUFFER_USAGE_LIMIT", 1391 "FULL", 1392 "LOCAL", 1393 "NO_WRITE_TO_BINLOG", 1394 "SAMPLE", 1395 "SKIP_LOCKED", 1396 "VERBOSE", 1397 } 1398 1399 ANALYZE_EXPRESSION_PARSERS = { 1400 "ALL": lambda self: self._parse_analyze_columns(), 1401 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1402 "DELETE": lambda self: self._parse_analyze_delete(), 1403 "DROP": lambda self: self._parse_analyze_histogram(), 1404 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1405 "LIST": lambda self: self._parse_analyze_list(), 1406 "PREDICATE": lambda self: self._parse_analyze_columns(), 1407 "UPDATE": lambda self: self._parse_analyze_histogram(), 1408 "VALIDATE": lambda self: self._parse_analyze_validate(), 1409 } 1410 1411 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1412 1413 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1414 1415 OPERATION_MODIFIERS: t.Set[str] = set() 1416 1417 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1418 1419 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1420 1421 STRICT_CAST = True 1422 1423 PREFIXED_PIVOT_COLUMNS = False 1424 IDENTIFY_PIVOT_STRINGS = False 1425 1426 LOG_DEFAULTS_TO_LN = False 1427 1428 # Whether ADD is present for each column added by ALTER TABLE 1429 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1430 1431 # Whether the table sample clause expects CSV syntax 1432 TABLESAMPLE_CSV = False 1433 1434 # The default method used for table sampling 1435 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1436 1437 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1438 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1439 1440 # Whether the TRIM function expects the characters to trim as its first argument 1441 TRIM_PATTERN_FIRST = False 1442 1443 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1444 STRING_ALIASES = False 1445 1446 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1447 MODIFIERS_ATTACHED_TO_SET_OP = True 1448 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1449 1450 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1451 NO_PAREN_IF_COMMANDS = True 1452 1453 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1454 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1455 1456 # Whether the `:` operator is used to extract a value from a VARIANT column 1457 COLON_IS_VARIANT_EXTRACT = False 1458 1459 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1460 # If this is True and '(' is not found, the keyword will be treated as an identifier 1461 VALUES_FOLLOWED_BY_PAREN = True 1462 1463 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1464 SUPPORTS_IMPLICIT_UNNEST = False 1465 1466 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1467 INTERVAL_SPANS = True 1468 1469 # Whether a PARTITION clause can follow a table reference 1470 SUPPORTS_PARTITION_SELECTION = False 1471 1472 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1473 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1474 1475 # Whether the 'AS' keyword is optional in the CTE definition syntax 1476 OPTIONAL_ALIAS_TOKEN_CTE = True 1477 1478 __slots__ = ( 1479 "error_level", 1480 "error_message_context", 1481 "max_errors", 1482 "dialect", 1483 "sql", 1484 "errors", 1485 "_tokens", 1486 "_index", 1487 "_curr", 1488 "_next", 1489 "_prev", 1490 "_prev_comments", 1491 ) 1492 1493 # Autofilled 1494 SHOW_TRIE: t.Dict = {} 1495 SET_TRIE: t.Dict = {} 1496 1497 def __init__( 1498 self, 1499 error_level: t.Optional[ErrorLevel] = None, 1500 error_message_context: int = 100, 1501 max_errors: int = 3, 1502 dialect: DialectType = None, 1503 ): 1504 from sqlglot.dialects import Dialect 1505 1506 self.error_level = error_level or ErrorLevel.IMMEDIATE 1507 self.error_message_context = error_message_context 1508 self.max_errors = max_errors 1509 self.dialect = Dialect.get_or_raise(dialect) 1510 self.reset() 1511 1512 def reset(self): 1513 self.sql = "" 1514 self.errors = [] 1515 self._tokens = [] 1516 self._index = 0 1517 self._curr = None 1518 self._next = None 1519 self._prev = None 1520 self._prev_comments = None 1521 1522 def parse( 1523 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1524 ) -> t.List[t.Optional[exp.Expression]]: 1525 """ 1526 Parses a list of tokens and returns a list of syntax trees, one tree 1527 per parsed SQL statement. 1528 1529 Args: 1530 raw_tokens: The list of tokens. 1531 sql: The original SQL string, used to produce helpful debug messages. 1532 1533 Returns: 1534 The list of the produced syntax trees. 1535 """ 1536 return self._parse( 1537 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1538 ) 1539 1540 def parse_into( 1541 self, 1542 expression_types: exp.IntoType, 1543 raw_tokens: t.List[Token], 1544 sql: t.Optional[str] = None, 1545 ) -> t.List[t.Optional[exp.Expression]]: 1546 """ 1547 Parses a list of tokens into a given Expression type. If a collection of Expression 1548 types is given instead, this method will try to parse the token list into each one 1549 of them, stopping at the first for which the parsing succeeds. 1550 1551 Args: 1552 expression_types: The expression type(s) to try and parse the token list into. 1553 raw_tokens: The list of tokens. 1554 sql: The original SQL string, used to produce helpful debug messages. 1555 1556 Returns: 1557 The target Expression. 1558 """ 1559 errors = [] 1560 for expression_type in ensure_list(expression_types): 1561 parser = self.EXPRESSION_PARSERS.get(expression_type) 1562 if not parser: 1563 raise TypeError(f"No parser registered for {expression_type}") 1564 1565 try: 1566 return self._parse(parser, raw_tokens, sql) 1567 except ParseError as e: 1568 e.errors[0]["into_expression"] = expression_type 1569 errors.append(e) 1570 1571 raise ParseError( 1572 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1573 errors=merge_errors(errors), 1574 ) from errors[-1] 1575 1576 def _parse( 1577 self, 1578 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1579 raw_tokens: t.List[Token], 1580 sql: t.Optional[str] = None, 1581 ) -> t.List[t.Optional[exp.Expression]]: 1582 self.reset() 1583 self.sql = sql or "" 1584 1585 total = len(raw_tokens) 1586 chunks: t.List[t.List[Token]] = [[]] 1587 1588 for i, token in enumerate(raw_tokens): 1589 if token.token_type == TokenType.SEMICOLON: 1590 if token.comments: 1591 chunks.append([token]) 1592 1593 if i < total - 1: 1594 chunks.append([]) 1595 else: 1596 chunks[-1].append(token) 1597 1598 expressions = [] 1599 1600 for tokens in chunks: 1601 self._index = -1 1602 self._tokens = tokens 1603 self._advance() 1604 1605 expressions.append(parse_method(self)) 1606 1607 if self._index < len(self._tokens): 1608 self.raise_error("Invalid expression / Unexpected token") 1609 1610 self.check_errors() 1611 1612 return expressions 1613 1614 def check_errors(self) -> None: 1615 """Logs or raises any found errors, depending on the chosen error level setting.""" 1616 if self.error_level == ErrorLevel.WARN: 1617 for error in self.errors: 1618 logger.error(str(error)) 1619 elif self.error_level == ErrorLevel.RAISE and self.errors: 1620 raise ParseError( 1621 concat_messages(self.errors, self.max_errors), 1622 errors=merge_errors(self.errors), 1623 ) 1624 1625 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1626 """ 1627 Appends an error in the list of recorded errors or raises it, depending on the chosen 1628 error level setting. 1629 """ 1630 token = token or self._curr or self._prev or Token.string("") 1631 start = token.start 1632 end = token.end + 1 1633 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1634 highlight = self.sql[start:end] 1635 end_context = self.sql[end : end + self.error_message_context] 1636 1637 error = ParseError.new( 1638 f"{message}. Line {token.line}, Col: {token.col}.\n" 1639 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1640 description=message, 1641 line=token.line, 1642 col=token.col, 1643 start_context=start_context, 1644 highlight=highlight, 1645 end_context=end_context, 1646 ) 1647 1648 if self.error_level == ErrorLevel.IMMEDIATE: 1649 raise error 1650 1651 self.errors.append(error) 1652 1653 def expression( 1654 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1655 ) -> E: 1656 """ 1657 Creates a new, validated Expression. 1658 1659 Args: 1660 exp_class: The expression class to instantiate. 1661 comments: An optional list of comments to attach to the expression. 1662 kwargs: The arguments to set for the expression along with their respective values. 1663 1664 Returns: 1665 The target expression. 1666 """ 1667 instance = exp_class(**kwargs) 1668 instance.add_comments(comments) if comments else self._add_comments(instance) 1669 return self.validate_expression(instance) 1670 1671 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1672 if expression and self._prev_comments: 1673 expression.add_comments(self._prev_comments) 1674 self._prev_comments = None 1675 1676 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1677 """ 1678 Validates an Expression, making sure that all its mandatory arguments are set. 1679 1680 Args: 1681 expression: The expression to validate. 1682 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1683 1684 Returns: 1685 The validated expression. 1686 """ 1687 if self.error_level != ErrorLevel.IGNORE: 1688 for error_message in expression.error_messages(args): 1689 self.raise_error(error_message) 1690 1691 return expression 1692 1693 def _find_sql(self, start: Token, end: Token) -> str: 1694 return self.sql[start.start : end.end + 1] 1695 1696 def _is_connected(self) -> bool: 1697 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1698 1699 def _advance(self, times: int = 1) -> None: 1700 self._index += times 1701 self._curr = seq_get(self._tokens, self._index) 1702 self._next = seq_get(self._tokens, self._index + 1) 1703 1704 if self._index > 0: 1705 self._prev = self._tokens[self._index - 1] 1706 self._prev_comments = self._prev.comments 1707 else: 1708 self._prev = None 1709 self._prev_comments = None 1710 1711 def _retreat(self, index: int) -> None: 1712 if index != self._index: 1713 self._advance(index - self._index) 1714 1715 def _warn_unsupported(self) -> None: 1716 if len(self._tokens) <= 1: 1717 return 1718 1719 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1720 # interested in emitting a warning for the one being currently processed. 1721 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1722 1723 logger.warning( 1724 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1725 ) 1726 1727 def _parse_command(self) -> exp.Command: 1728 self._warn_unsupported() 1729 return self.expression( 1730 exp.Command, 1731 comments=self._prev_comments, 1732 this=self._prev.text.upper(), 1733 expression=self._parse_string(), 1734 ) 1735 1736 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1737 """ 1738 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1739 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1740 solve this by setting & resetting the parser state accordingly 1741 """ 1742 index = self._index 1743 error_level = self.error_level 1744 1745 self.error_level = ErrorLevel.IMMEDIATE 1746 try: 1747 this = parse_method() 1748 except ParseError: 1749 this = None 1750 finally: 1751 if not this or retreat: 1752 self._retreat(index) 1753 self.error_level = error_level 1754 1755 return this 1756 1757 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1758 start = self._prev 1759 exists = self._parse_exists() if allow_exists else None 1760 1761 self._match(TokenType.ON) 1762 1763 materialized = self._match_text_seq("MATERIALIZED") 1764 kind = self._match_set(self.CREATABLES) and self._prev 1765 if not kind: 1766 return self._parse_as_command(start) 1767 1768 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1769 this = self._parse_user_defined_function(kind=kind.token_type) 1770 elif kind.token_type == TokenType.TABLE: 1771 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1772 elif kind.token_type == TokenType.COLUMN: 1773 this = self._parse_column() 1774 else: 1775 this = self._parse_id_var() 1776 1777 self._match(TokenType.IS) 1778 1779 return self.expression( 1780 exp.Comment, 1781 this=this, 1782 kind=kind.text, 1783 expression=self._parse_string(), 1784 exists=exists, 1785 materialized=materialized, 1786 ) 1787 1788 def _parse_to_table( 1789 self, 1790 ) -> exp.ToTableProperty: 1791 table = self._parse_table_parts(schema=True) 1792 return self.expression(exp.ToTableProperty, this=table) 1793 1794 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1795 def _parse_ttl(self) -> exp.Expression: 1796 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1797 this = self._parse_bitwise() 1798 1799 if self._match_text_seq("DELETE"): 1800 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1801 if self._match_text_seq("RECOMPRESS"): 1802 return self.expression( 1803 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1804 ) 1805 if self._match_text_seq("TO", "DISK"): 1806 return self.expression( 1807 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1808 ) 1809 if self._match_text_seq("TO", "VOLUME"): 1810 return self.expression( 1811 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1812 ) 1813 1814 return this 1815 1816 expressions = self._parse_csv(_parse_ttl_action) 1817 where = self._parse_where() 1818 group = self._parse_group() 1819 1820 aggregates = None 1821 if group and self._match(TokenType.SET): 1822 aggregates = self._parse_csv(self._parse_set_item) 1823 1824 return self.expression( 1825 exp.MergeTreeTTL, 1826 expressions=expressions, 1827 where=where, 1828 group=group, 1829 aggregates=aggregates, 1830 ) 1831 1832 def _parse_statement(self) -> t.Optional[exp.Expression]: 1833 if self._curr is None: 1834 return None 1835 1836 if self._match_set(self.STATEMENT_PARSERS): 1837 comments = self._prev_comments 1838 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1839 stmt.add_comments(comments, prepend=True) 1840 return stmt 1841 1842 if self._match_set(self.dialect.tokenizer.COMMANDS): 1843 return self._parse_command() 1844 1845 expression = self._parse_expression() 1846 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1847 return self._parse_query_modifiers(expression) 1848 1849 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1850 start = self._prev 1851 temporary = self._match(TokenType.TEMPORARY) 1852 materialized = self._match_text_seq("MATERIALIZED") 1853 1854 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1855 if not kind: 1856 return self._parse_as_command(start) 1857 1858 concurrently = self._match_text_seq("CONCURRENTLY") 1859 if_exists = exists or self._parse_exists() 1860 1861 if kind == "COLUMN": 1862 this = self._parse_column() 1863 else: 1864 this = self._parse_table_parts( 1865 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1866 ) 1867 1868 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1869 1870 if self._match(TokenType.L_PAREN, advance=False): 1871 expressions = self._parse_wrapped_csv(self._parse_types) 1872 else: 1873 expressions = None 1874 1875 return self.expression( 1876 exp.Drop, 1877 exists=if_exists, 1878 this=this, 1879 expressions=expressions, 1880 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1881 temporary=temporary, 1882 materialized=materialized, 1883 cascade=self._match_text_seq("CASCADE"), 1884 constraints=self._match_text_seq("CONSTRAINTS"), 1885 purge=self._match_text_seq("PURGE"), 1886 cluster=cluster, 1887 concurrently=concurrently, 1888 ) 1889 1890 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1891 return ( 1892 self._match_text_seq("IF") 1893 and (not not_ or self._match(TokenType.NOT)) 1894 and self._match(TokenType.EXISTS) 1895 ) 1896 1897 def _parse_create(self) -> exp.Create | exp.Command: 1898 # Note: this can't be None because we've matched a statement parser 1899 start = self._prev 1900 1901 replace = ( 1902 start.token_type == TokenType.REPLACE 1903 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1904 or self._match_pair(TokenType.OR, TokenType.ALTER) 1905 ) 1906 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1907 1908 unique = self._match(TokenType.UNIQUE) 1909 1910 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1911 clustered = True 1912 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1913 "COLUMNSTORE" 1914 ): 1915 clustered = False 1916 else: 1917 clustered = None 1918 1919 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1920 self._advance() 1921 1922 properties = None 1923 create_token = self._match_set(self.CREATABLES) and self._prev 1924 1925 if not create_token: 1926 # exp.Properties.Location.POST_CREATE 1927 properties = self._parse_properties() 1928 create_token = self._match_set(self.CREATABLES) and self._prev 1929 1930 if not properties or not create_token: 1931 return self._parse_as_command(start) 1932 1933 concurrently = self._match_text_seq("CONCURRENTLY") 1934 exists = self._parse_exists(not_=True) 1935 this = None 1936 expression: t.Optional[exp.Expression] = None 1937 indexes = None 1938 no_schema_binding = None 1939 begin = None 1940 end = None 1941 clone = None 1942 1943 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1944 nonlocal properties 1945 if properties and temp_props: 1946 properties.expressions.extend(temp_props.expressions) 1947 elif temp_props: 1948 properties = temp_props 1949 1950 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1951 this = self._parse_user_defined_function(kind=create_token.token_type) 1952 1953 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1954 extend_props(self._parse_properties()) 1955 1956 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1957 extend_props(self._parse_properties()) 1958 1959 if not expression: 1960 if self._match(TokenType.COMMAND): 1961 expression = self._parse_as_command(self._prev) 1962 else: 1963 begin = self._match(TokenType.BEGIN) 1964 return_ = self._match_text_seq("RETURN") 1965 1966 if self._match(TokenType.STRING, advance=False): 1967 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1968 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1969 expression = self._parse_string() 1970 extend_props(self._parse_properties()) 1971 else: 1972 expression = self._parse_user_defined_function_expression() 1973 1974 end = self._match_text_seq("END") 1975 1976 if return_: 1977 expression = self.expression(exp.Return, this=expression) 1978 elif create_token.token_type == TokenType.INDEX: 1979 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1980 if not self._match(TokenType.ON): 1981 index = self._parse_id_var() 1982 anonymous = False 1983 else: 1984 index = None 1985 anonymous = True 1986 1987 this = self._parse_index(index=index, anonymous=anonymous) 1988 elif create_token.token_type in self.DB_CREATABLES: 1989 table_parts = self._parse_table_parts( 1990 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1991 ) 1992 1993 # exp.Properties.Location.POST_NAME 1994 self._match(TokenType.COMMA) 1995 extend_props(self._parse_properties(before=True)) 1996 1997 this = self._parse_schema(this=table_parts) 1998 1999 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2000 extend_props(self._parse_properties()) 2001 2002 self._match(TokenType.ALIAS) 2003 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2004 # exp.Properties.Location.POST_ALIAS 2005 extend_props(self._parse_properties()) 2006 2007 if create_token.token_type == TokenType.SEQUENCE: 2008 expression = self._parse_types() 2009 extend_props(self._parse_properties()) 2010 else: 2011 expression = self._parse_ddl_select() 2012 2013 if create_token.token_type == TokenType.TABLE: 2014 # exp.Properties.Location.POST_EXPRESSION 2015 extend_props(self._parse_properties()) 2016 2017 indexes = [] 2018 while True: 2019 index = self._parse_index() 2020 2021 # exp.Properties.Location.POST_INDEX 2022 extend_props(self._parse_properties()) 2023 if not index: 2024 break 2025 else: 2026 self._match(TokenType.COMMA) 2027 indexes.append(index) 2028 elif create_token.token_type == TokenType.VIEW: 2029 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2030 no_schema_binding = True 2031 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2032 extend_props(self._parse_properties()) 2033 2034 shallow = self._match_text_seq("SHALLOW") 2035 2036 if self._match_texts(self.CLONE_KEYWORDS): 2037 copy = self._prev.text.lower() == "copy" 2038 clone = self.expression( 2039 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2040 ) 2041 2042 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2043 return self._parse_as_command(start) 2044 2045 create_kind_text = create_token.text.upper() 2046 return self.expression( 2047 exp.Create, 2048 this=this, 2049 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2050 replace=replace, 2051 refresh=refresh, 2052 unique=unique, 2053 expression=expression, 2054 exists=exists, 2055 properties=properties, 2056 indexes=indexes, 2057 no_schema_binding=no_schema_binding, 2058 begin=begin, 2059 end=end, 2060 clone=clone, 2061 concurrently=concurrently, 2062 clustered=clustered, 2063 ) 2064 2065 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2066 seq = exp.SequenceProperties() 2067 2068 options = [] 2069 index = self._index 2070 2071 while self._curr: 2072 self._match(TokenType.COMMA) 2073 if self._match_text_seq("INCREMENT"): 2074 self._match_text_seq("BY") 2075 self._match_text_seq("=") 2076 seq.set("increment", self._parse_term()) 2077 elif self._match_text_seq("MINVALUE"): 2078 seq.set("minvalue", self._parse_term()) 2079 elif self._match_text_seq("MAXVALUE"): 2080 seq.set("maxvalue", self._parse_term()) 2081 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2082 self._match_text_seq("=") 2083 seq.set("start", self._parse_term()) 2084 elif self._match_text_seq("CACHE"): 2085 # T-SQL allows empty CACHE which is initialized dynamically 2086 seq.set("cache", self._parse_number() or True) 2087 elif self._match_text_seq("OWNED", "BY"): 2088 # "OWNED BY NONE" is the default 2089 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2090 else: 2091 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2092 if opt: 2093 options.append(opt) 2094 else: 2095 break 2096 2097 seq.set("options", options if options else None) 2098 return None if self._index == index else seq 2099 2100 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2101 # only used for teradata currently 2102 self._match(TokenType.COMMA) 2103 2104 kwargs = { 2105 "no": self._match_text_seq("NO"), 2106 "dual": self._match_text_seq("DUAL"), 2107 "before": self._match_text_seq("BEFORE"), 2108 "default": self._match_text_seq("DEFAULT"), 2109 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2110 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2111 "after": self._match_text_seq("AFTER"), 2112 "minimum": self._match_texts(("MIN", "MINIMUM")), 2113 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2114 } 2115 2116 if self._match_texts(self.PROPERTY_PARSERS): 2117 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2118 try: 2119 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2120 except TypeError: 2121 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2122 2123 return None 2124 2125 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2126 return self._parse_wrapped_csv(self._parse_property) 2127 2128 def _parse_property(self) -> t.Optional[exp.Expression]: 2129 if self._match_texts(self.PROPERTY_PARSERS): 2130 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2131 2132 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2133 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2134 2135 if self._match_text_seq("COMPOUND", "SORTKEY"): 2136 return self._parse_sortkey(compound=True) 2137 2138 if self._match_text_seq("SQL", "SECURITY"): 2139 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2140 2141 index = self._index 2142 key = self._parse_column() 2143 2144 if not self._match(TokenType.EQ): 2145 self._retreat(index) 2146 return self._parse_sequence_properties() 2147 2148 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2149 if isinstance(key, exp.Column): 2150 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2151 2152 value = self._parse_bitwise() or self._parse_var(any_token=True) 2153 2154 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2155 if isinstance(value, exp.Column): 2156 value = exp.var(value.name) 2157 2158 return self.expression(exp.Property, this=key, value=value) 2159 2160 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2161 if self._match_text_seq("BY"): 2162 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2163 2164 self._match(TokenType.ALIAS) 2165 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2166 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2167 2168 return self.expression( 2169 exp.FileFormatProperty, 2170 this=( 2171 self.expression( 2172 exp.InputOutputFormat, 2173 input_format=input_format, 2174 output_format=output_format, 2175 ) 2176 if input_format or output_format 2177 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2178 ), 2179 ) 2180 2181 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2182 field = self._parse_field() 2183 if isinstance(field, exp.Identifier) and not field.quoted: 2184 field = exp.var(field) 2185 2186 return field 2187 2188 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2189 self._match(TokenType.EQ) 2190 self._match(TokenType.ALIAS) 2191 2192 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2193 2194 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2195 properties = [] 2196 while True: 2197 if before: 2198 prop = self._parse_property_before() 2199 else: 2200 prop = self._parse_property() 2201 if not prop: 2202 break 2203 for p in ensure_list(prop): 2204 properties.append(p) 2205 2206 if properties: 2207 return self.expression(exp.Properties, expressions=properties) 2208 2209 return None 2210 2211 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2212 return self.expression( 2213 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2214 ) 2215 2216 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2217 if self._match_texts(("DEFINER", "INVOKER")): 2218 security_specifier = self._prev.text.upper() 2219 return self.expression(exp.SecurityProperty, this=security_specifier) 2220 return None 2221 2222 def _parse_settings_property(self) -> exp.SettingsProperty: 2223 return self.expression( 2224 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2225 ) 2226 2227 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2228 if self._index >= 2: 2229 pre_volatile_token = self._tokens[self._index - 2] 2230 else: 2231 pre_volatile_token = None 2232 2233 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2234 return exp.VolatileProperty() 2235 2236 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2237 2238 def _parse_retention_period(self) -> exp.Var: 2239 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2240 number = self._parse_number() 2241 number_str = f"{number} " if number else "" 2242 unit = self._parse_var(any_token=True) 2243 return exp.var(f"{number_str}{unit}") 2244 2245 def _parse_system_versioning_property( 2246 self, with_: bool = False 2247 ) -> exp.WithSystemVersioningProperty: 2248 self._match(TokenType.EQ) 2249 prop = self.expression( 2250 exp.WithSystemVersioningProperty, 2251 **{ # type: ignore 2252 "on": True, 2253 "with": with_, 2254 }, 2255 ) 2256 2257 if self._match_text_seq("OFF"): 2258 prop.set("on", False) 2259 return prop 2260 2261 self._match(TokenType.ON) 2262 if self._match(TokenType.L_PAREN): 2263 while self._curr and not self._match(TokenType.R_PAREN): 2264 if self._match_text_seq("HISTORY_TABLE", "="): 2265 prop.set("this", self._parse_table_parts()) 2266 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2267 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2268 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2269 prop.set("retention_period", self._parse_retention_period()) 2270 2271 self._match(TokenType.COMMA) 2272 2273 return prop 2274 2275 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2276 self._match(TokenType.EQ) 2277 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2278 prop = self.expression(exp.DataDeletionProperty, on=on) 2279 2280 if self._match(TokenType.L_PAREN): 2281 while self._curr and not self._match(TokenType.R_PAREN): 2282 if self._match_text_seq("FILTER_COLUMN", "="): 2283 prop.set("filter_column", self._parse_column()) 2284 elif self._match_text_seq("RETENTION_PERIOD", "="): 2285 prop.set("retention_period", self._parse_retention_period()) 2286 2287 self._match(TokenType.COMMA) 2288 2289 return prop 2290 2291 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2292 kind = "HASH" 2293 expressions: t.Optional[t.List[exp.Expression]] = None 2294 if self._match_text_seq("BY", "HASH"): 2295 expressions = self._parse_wrapped_csv(self._parse_id_var) 2296 elif self._match_text_seq("BY", "RANDOM"): 2297 kind = "RANDOM" 2298 2299 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2300 buckets: t.Optional[exp.Expression] = None 2301 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2302 buckets = self._parse_number() 2303 2304 return self.expression( 2305 exp.DistributedByProperty, 2306 expressions=expressions, 2307 kind=kind, 2308 buckets=buckets, 2309 order=self._parse_order(), 2310 ) 2311 2312 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2313 self._match_text_seq("KEY") 2314 expressions = self._parse_wrapped_id_vars() 2315 return self.expression(expr_type, expressions=expressions) 2316 2317 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2318 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2319 prop = self._parse_system_versioning_property(with_=True) 2320 self._match_r_paren() 2321 return prop 2322 2323 if self._match(TokenType.L_PAREN, advance=False): 2324 return self._parse_wrapped_properties() 2325 2326 if self._match_text_seq("JOURNAL"): 2327 return self._parse_withjournaltable() 2328 2329 if self._match_texts(self.VIEW_ATTRIBUTES): 2330 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2331 2332 if self._match_text_seq("DATA"): 2333 return self._parse_withdata(no=False) 2334 elif self._match_text_seq("NO", "DATA"): 2335 return self._parse_withdata(no=True) 2336 2337 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2338 return self._parse_serde_properties(with_=True) 2339 2340 if self._match(TokenType.SCHEMA): 2341 return self.expression( 2342 exp.WithSchemaBindingProperty, 2343 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2344 ) 2345 2346 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2347 return self.expression( 2348 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2349 ) 2350 2351 if not self._next: 2352 return None 2353 2354 return self._parse_withisolatedloading() 2355 2356 def _parse_procedure_option(self) -> exp.Expression | None: 2357 if self._match_text_seq("EXECUTE", "AS"): 2358 return self.expression( 2359 exp.ExecuteAsProperty, 2360 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2361 or self._parse_string(), 2362 ) 2363 2364 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2365 2366 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2367 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2368 self._match(TokenType.EQ) 2369 2370 user = self._parse_id_var() 2371 self._match(TokenType.PARAMETER) 2372 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2373 2374 if not user or not host: 2375 return None 2376 2377 return exp.DefinerProperty(this=f"{user}@{host}") 2378 2379 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2380 self._match(TokenType.TABLE) 2381 self._match(TokenType.EQ) 2382 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2383 2384 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2385 return self.expression(exp.LogProperty, no=no) 2386 2387 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2388 return self.expression(exp.JournalProperty, **kwargs) 2389 2390 def _parse_checksum(self) -> exp.ChecksumProperty: 2391 self._match(TokenType.EQ) 2392 2393 on = None 2394 if self._match(TokenType.ON): 2395 on = True 2396 elif self._match_text_seq("OFF"): 2397 on = False 2398 2399 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2400 2401 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2402 return self.expression( 2403 exp.Cluster, 2404 expressions=( 2405 self._parse_wrapped_csv(self._parse_ordered) 2406 if wrapped 2407 else self._parse_csv(self._parse_ordered) 2408 ), 2409 ) 2410 2411 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2412 self._match_text_seq("BY") 2413 2414 self._match_l_paren() 2415 expressions = self._parse_csv(self._parse_column) 2416 self._match_r_paren() 2417 2418 if self._match_text_seq("SORTED", "BY"): 2419 self._match_l_paren() 2420 sorted_by = self._parse_csv(self._parse_ordered) 2421 self._match_r_paren() 2422 else: 2423 sorted_by = None 2424 2425 self._match(TokenType.INTO) 2426 buckets = self._parse_number() 2427 self._match_text_seq("BUCKETS") 2428 2429 return self.expression( 2430 exp.ClusteredByProperty, 2431 expressions=expressions, 2432 sorted_by=sorted_by, 2433 buckets=buckets, 2434 ) 2435 2436 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2437 if not self._match_text_seq("GRANTS"): 2438 self._retreat(self._index - 1) 2439 return None 2440 2441 return self.expression(exp.CopyGrantsProperty) 2442 2443 def _parse_freespace(self) -> exp.FreespaceProperty: 2444 self._match(TokenType.EQ) 2445 return self.expression( 2446 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2447 ) 2448 2449 def _parse_mergeblockratio( 2450 self, no: bool = False, default: bool = False 2451 ) -> exp.MergeBlockRatioProperty: 2452 if self._match(TokenType.EQ): 2453 return self.expression( 2454 exp.MergeBlockRatioProperty, 2455 this=self._parse_number(), 2456 percent=self._match(TokenType.PERCENT), 2457 ) 2458 2459 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2460 2461 def _parse_datablocksize( 2462 self, 2463 default: t.Optional[bool] = None, 2464 minimum: t.Optional[bool] = None, 2465 maximum: t.Optional[bool] = None, 2466 ) -> exp.DataBlocksizeProperty: 2467 self._match(TokenType.EQ) 2468 size = self._parse_number() 2469 2470 units = None 2471 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2472 units = self._prev.text 2473 2474 return self.expression( 2475 exp.DataBlocksizeProperty, 2476 size=size, 2477 units=units, 2478 default=default, 2479 minimum=minimum, 2480 maximum=maximum, 2481 ) 2482 2483 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2484 self._match(TokenType.EQ) 2485 always = self._match_text_seq("ALWAYS") 2486 manual = self._match_text_seq("MANUAL") 2487 never = self._match_text_seq("NEVER") 2488 default = self._match_text_seq("DEFAULT") 2489 2490 autotemp = None 2491 if self._match_text_seq("AUTOTEMP"): 2492 autotemp = self._parse_schema() 2493 2494 return self.expression( 2495 exp.BlockCompressionProperty, 2496 always=always, 2497 manual=manual, 2498 never=never, 2499 default=default, 2500 autotemp=autotemp, 2501 ) 2502 2503 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2504 index = self._index 2505 no = self._match_text_seq("NO") 2506 concurrent = self._match_text_seq("CONCURRENT") 2507 2508 if not self._match_text_seq("ISOLATED", "LOADING"): 2509 self._retreat(index) 2510 return None 2511 2512 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2513 return self.expression( 2514 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2515 ) 2516 2517 def _parse_locking(self) -> exp.LockingProperty: 2518 if self._match(TokenType.TABLE): 2519 kind = "TABLE" 2520 elif self._match(TokenType.VIEW): 2521 kind = "VIEW" 2522 elif self._match(TokenType.ROW): 2523 kind = "ROW" 2524 elif self._match_text_seq("DATABASE"): 2525 kind = "DATABASE" 2526 else: 2527 kind = None 2528 2529 if kind in ("DATABASE", "TABLE", "VIEW"): 2530 this = self._parse_table_parts() 2531 else: 2532 this = None 2533 2534 if self._match(TokenType.FOR): 2535 for_or_in = "FOR" 2536 elif self._match(TokenType.IN): 2537 for_or_in = "IN" 2538 else: 2539 for_or_in = None 2540 2541 if self._match_text_seq("ACCESS"): 2542 lock_type = "ACCESS" 2543 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2544 lock_type = "EXCLUSIVE" 2545 elif self._match_text_seq("SHARE"): 2546 lock_type = "SHARE" 2547 elif self._match_text_seq("READ"): 2548 lock_type = "READ" 2549 elif self._match_text_seq("WRITE"): 2550 lock_type = "WRITE" 2551 elif self._match_text_seq("CHECKSUM"): 2552 lock_type = "CHECKSUM" 2553 else: 2554 lock_type = None 2555 2556 override = self._match_text_seq("OVERRIDE") 2557 2558 return self.expression( 2559 exp.LockingProperty, 2560 this=this, 2561 kind=kind, 2562 for_or_in=for_or_in, 2563 lock_type=lock_type, 2564 override=override, 2565 ) 2566 2567 def _parse_partition_by(self) -> t.List[exp.Expression]: 2568 if self._match(TokenType.PARTITION_BY): 2569 return self._parse_csv(self._parse_assignment) 2570 return [] 2571 2572 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2573 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2574 if self._match_text_seq("MINVALUE"): 2575 return exp.var("MINVALUE") 2576 if self._match_text_seq("MAXVALUE"): 2577 return exp.var("MAXVALUE") 2578 return self._parse_bitwise() 2579 2580 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2581 expression = None 2582 from_expressions = None 2583 to_expressions = None 2584 2585 if self._match(TokenType.IN): 2586 this = self._parse_wrapped_csv(self._parse_bitwise) 2587 elif self._match(TokenType.FROM): 2588 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2589 self._match_text_seq("TO") 2590 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2591 elif self._match_text_seq("WITH", "(", "MODULUS"): 2592 this = self._parse_number() 2593 self._match_text_seq(",", "REMAINDER") 2594 expression = self._parse_number() 2595 self._match_r_paren() 2596 else: 2597 self.raise_error("Failed to parse partition bound spec.") 2598 2599 return self.expression( 2600 exp.PartitionBoundSpec, 2601 this=this, 2602 expression=expression, 2603 from_expressions=from_expressions, 2604 to_expressions=to_expressions, 2605 ) 2606 2607 # https://www.postgresql.org/docs/current/sql-createtable.html 2608 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2609 if not self._match_text_seq("OF"): 2610 self._retreat(self._index - 1) 2611 return None 2612 2613 this = self._parse_table(schema=True) 2614 2615 if self._match(TokenType.DEFAULT): 2616 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2617 elif self._match_text_seq("FOR", "VALUES"): 2618 expression = self._parse_partition_bound_spec() 2619 else: 2620 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2621 2622 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2623 2624 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2625 self._match(TokenType.EQ) 2626 return self.expression( 2627 exp.PartitionedByProperty, 2628 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2629 ) 2630 2631 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2632 if self._match_text_seq("AND", "STATISTICS"): 2633 statistics = True 2634 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2635 statistics = False 2636 else: 2637 statistics = None 2638 2639 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2640 2641 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2642 if self._match_text_seq("SQL"): 2643 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2644 return None 2645 2646 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2647 if self._match_text_seq("SQL", "DATA"): 2648 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2649 return None 2650 2651 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2652 if self._match_text_seq("PRIMARY", "INDEX"): 2653 return exp.NoPrimaryIndexProperty() 2654 if self._match_text_seq("SQL"): 2655 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2656 return None 2657 2658 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2659 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2660 return exp.OnCommitProperty() 2661 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2662 return exp.OnCommitProperty(delete=True) 2663 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2664 2665 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2666 if self._match_text_seq("SQL", "DATA"): 2667 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2668 return None 2669 2670 def _parse_distkey(self) -> exp.DistKeyProperty: 2671 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2672 2673 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2674 table = self._parse_table(schema=True) 2675 2676 options = [] 2677 while self._match_texts(("INCLUDING", "EXCLUDING")): 2678 this = self._prev.text.upper() 2679 2680 id_var = self._parse_id_var() 2681 if not id_var: 2682 return None 2683 2684 options.append( 2685 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2686 ) 2687 2688 return self.expression(exp.LikeProperty, this=table, expressions=options) 2689 2690 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2691 return self.expression( 2692 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2693 ) 2694 2695 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2696 self._match(TokenType.EQ) 2697 return self.expression( 2698 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2699 ) 2700 2701 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2702 self._match_text_seq("WITH", "CONNECTION") 2703 return self.expression( 2704 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2705 ) 2706 2707 def _parse_returns(self) -> exp.ReturnsProperty: 2708 value: t.Optional[exp.Expression] 2709 null = None 2710 is_table = self._match(TokenType.TABLE) 2711 2712 if is_table: 2713 if self._match(TokenType.LT): 2714 value = self.expression( 2715 exp.Schema, 2716 this="TABLE", 2717 expressions=self._parse_csv(self._parse_struct_types), 2718 ) 2719 if not self._match(TokenType.GT): 2720 self.raise_error("Expecting >") 2721 else: 2722 value = self._parse_schema(exp.var("TABLE")) 2723 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2724 null = True 2725 value = None 2726 else: 2727 value = self._parse_types() 2728 2729 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2730 2731 def _parse_describe(self) -> exp.Describe: 2732 kind = self._match_set(self.CREATABLES) and self._prev.text 2733 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2734 if self._match(TokenType.DOT): 2735 style = None 2736 self._retreat(self._index - 2) 2737 2738 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2739 2740 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2741 this = self._parse_statement() 2742 else: 2743 this = self._parse_table(schema=True) 2744 2745 properties = self._parse_properties() 2746 expressions = properties.expressions if properties else None 2747 partition = self._parse_partition() 2748 return self.expression( 2749 exp.Describe, 2750 this=this, 2751 style=style, 2752 kind=kind, 2753 expressions=expressions, 2754 partition=partition, 2755 format=format, 2756 ) 2757 2758 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2759 kind = self._prev.text.upper() 2760 expressions = [] 2761 2762 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2763 if self._match(TokenType.WHEN): 2764 expression = self._parse_disjunction() 2765 self._match(TokenType.THEN) 2766 else: 2767 expression = None 2768 2769 else_ = self._match(TokenType.ELSE) 2770 2771 if not self._match(TokenType.INTO): 2772 return None 2773 2774 return self.expression( 2775 exp.ConditionalInsert, 2776 this=self.expression( 2777 exp.Insert, 2778 this=self._parse_table(schema=True), 2779 expression=self._parse_derived_table_values(), 2780 ), 2781 expression=expression, 2782 else_=else_, 2783 ) 2784 2785 expression = parse_conditional_insert() 2786 while expression is not None: 2787 expressions.append(expression) 2788 expression = parse_conditional_insert() 2789 2790 return self.expression( 2791 exp.MultitableInserts, 2792 kind=kind, 2793 comments=comments, 2794 expressions=expressions, 2795 source=self._parse_table(), 2796 ) 2797 2798 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2799 comments = [] 2800 hint = self._parse_hint() 2801 overwrite = self._match(TokenType.OVERWRITE) 2802 ignore = self._match(TokenType.IGNORE) 2803 local = self._match_text_seq("LOCAL") 2804 alternative = None 2805 is_function = None 2806 2807 if self._match_text_seq("DIRECTORY"): 2808 this: t.Optional[exp.Expression] = self.expression( 2809 exp.Directory, 2810 this=self._parse_var_or_string(), 2811 local=local, 2812 row_format=self._parse_row_format(match_row=True), 2813 ) 2814 else: 2815 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2816 comments += ensure_list(self._prev_comments) 2817 return self._parse_multitable_inserts(comments) 2818 2819 if self._match(TokenType.OR): 2820 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2821 2822 self._match(TokenType.INTO) 2823 comments += ensure_list(self._prev_comments) 2824 self._match(TokenType.TABLE) 2825 is_function = self._match(TokenType.FUNCTION) 2826 2827 this = ( 2828 self._parse_table(schema=True, parse_partition=True) 2829 if not is_function 2830 else self._parse_function() 2831 ) 2832 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2833 this.set("alias", self._parse_table_alias()) 2834 2835 returning = self._parse_returning() 2836 2837 return self.expression( 2838 exp.Insert, 2839 comments=comments, 2840 hint=hint, 2841 is_function=is_function, 2842 this=this, 2843 stored=self._match_text_seq("STORED") and self._parse_stored(), 2844 by_name=self._match_text_seq("BY", "NAME"), 2845 exists=self._parse_exists(), 2846 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2847 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2848 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2849 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2850 conflict=self._parse_on_conflict(), 2851 returning=returning or self._parse_returning(), 2852 overwrite=overwrite, 2853 alternative=alternative, 2854 ignore=ignore, 2855 source=self._match(TokenType.TABLE) and self._parse_table(), 2856 ) 2857 2858 def _parse_kill(self) -> exp.Kill: 2859 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2860 2861 return self.expression( 2862 exp.Kill, 2863 this=self._parse_primary(), 2864 kind=kind, 2865 ) 2866 2867 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2868 conflict = self._match_text_seq("ON", "CONFLICT") 2869 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2870 2871 if not conflict and not duplicate: 2872 return None 2873 2874 conflict_keys = None 2875 constraint = None 2876 2877 if conflict: 2878 if self._match_text_seq("ON", "CONSTRAINT"): 2879 constraint = self._parse_id_var() 2880 elif self._match(TokenType.L_PAREN): 2881 conflict_keys = self._parse_csv(self._parse_id_var) 2882 self._match_r_paren() 2883 2884 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2885 if self._prev.token_type == TokenType.UPDATE: 2886 self._match(TokenType.SET) 2887 expressions = self._parse_csv(self._parse_equality) 2888 else: 2889 expressions = None 2890 2891 return self.expression( 2892 exp.OnConflict, 2893 duplicate=duplicate, 2894 expressions=expressions, 2895 action=action, 2896 conflict_keys=conflict_keys, 2897 constraint=constraint, 2898 where=self._parse_where(), 2899 ) 2900 2901 def _parse_returning(self) -> t.Optional[exp.Returning]: 2902 if not self._match(TokenType.RETURNING): 2903 return None 2904 return self.expression( 2905 exp.Returning, 2906 expressions=self._parse_csv(self._parse_expression), 2907 into=self._match(TokenType.INTO) and self._parse_table_part(), 2908 ) 2909 2910 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2911 if not self._match(TokenType.FORMAT): 2912 return None 2913 return self._parse_row_format() 2914 2915 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2916 index = self._index 2917 with_ = with_ or self._match_text_seq("WITH") 2918 2919 if not self._match(TokenType.SERDE_PROPERTIES): 2920 self._retreat(index) 2921 return None 2922 return self.expression( 2923 exp.SerdeProperties, 2924 **{ # type: ignore 2925 "expressions": self._parse_wrapped_properties(), 2926 "with": with_, 2927 }, 2928 ) 2929 2930 def _parse_row_format( 2931 self, match_row: bool = False 2932 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2933 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2934 return None 2935 2936 if self._match_text_seq("SERDE"): 2937 this = self._parse_string() 2938 2939 serde_properties = self._parse_serde_properties() 2940 2941 return self.expression( 2942 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2943 ) 2944 2945 self._match_text_seq("DELIMITED") 2946 2947 kwargs = {} 2948 2949 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2950 kwargs["fields"] = self._parse_string() 2951 if self._match_text_seq("ESCAPED", "BY"): 2952 kwargs["escaped"] = self._parse_string() 2953 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2954 kwargs["collection_items"] = self._parse_string() 2955 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2956 kwargs["map_keys"] = self._parse_string() 2957 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2958 kwargs["lines"] = self._parse_string() 2959 if self._match_text_seq("NULL", "DEFINED", "AS"): 2960 kwargs["null"] = self._parse_string() 2961 2962 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2963 2964 def _parse_load(self) -> exp.LoadData | exp.Command: 2965 if self._match_text_seq("DATA"): 2966 local = self._match_text_seq("LOCAL") 2967 self._match_text_seq("INPATH") 2968 inpath = self._parse_string() 2969 overwrite = self._match(TokenType.OVERWRITE) 2970 self._match_pair(TokenType.INTO, TokenType.TABLE) 2971 2972 return self.expression( 2973 exp.LoadData, 2974 this=self._parse_table(schema=True), 2975 local=local, 2976 overwrite=overwrite, 2977 inpath=inpath, 2978 partition=self._parse_partition(), 2979 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2980 serde=self._match_text_seq("SERDE") and self._parse_string(), 2981 ) 2982 return self._parse_as_command(self._prev) 2983 2984 def _parse_delete(self) -> exp.Delete: 2985 # This handles MySQL's "Multiple-Table Syntax" 2986 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2987 tables = None 2988 if not self._match(TokenType.FROM, advance=False): 2989 tables = self._parse_csv(self._parse_table) or None 2990 2991 returning = self._parse_returning() 2992 2993 return self.expression( 2994 exp.Delete, 2995 tables=tables, 2996 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2997 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2998 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2999 where=self._parse_where(), 3000 returning=returning or self._parse_returning(), 3001 limit=self._parse_limit(), 3002 ) 3003 3004 def _parse_update(self) -> exp.Update: 3005 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3006 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3007 returning = self._parse_returning() 3008 return self.expression( 3009 exp.Update, 3010 **{ # type: ignore 3011 "this": this, 3012 "expressions": expressions, 3013 "from": self._parse_from(joins=True), 3014 "where": self._parse_where(), 3015 "returning": returning or self._parse_returning(), 3016 "order": self._parse_order(), 3017 "limit": self._parse_limit(), 3018 }, 3019 ) 3020 3021 def _parse_use(self) -> exp.Use: 3022 return self.expression( 3023 exp.Use, 3024 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3025 this=self._parse_table(schema=False), 3026 ) 3027 3028 def _parse_uncache(self) -> exp.Uncache: 3029 if not self._match(TokenType.TABLE): 3030 self.raise_error("Expecting TABLE after UNCACHE") 3031 3032 return self.expression( 3033 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3034 ) 3035 3036 def _parse_cache(self) -> exp.Cache: 3037 lazy = self._match_text_seq("LAZY") 3038 self._match(TokenType.TABLE) 3039 table = self._parse_table(schema=True) 3040 3041 options = [] 3042 if self._match_text_seq("OPTIONS"): 3043 self._match_l_paren() 3044 k = self._parse_string() 3045 self._match(TokenType.EQ) 3046 v = self._parse_string() 3047 options = [k, v] 3048 self._match_r_paren() 3049 3050 self._match(TokenType.ALIAS) 3051 return self.expression( 3052 exp.Cache, 3053 this=table, 3054 lazy=lazy, 3055 options=options, 3056 expression=self._parse_select(nested=True), 3057 ) 3058 3059 def _parse_partition(self) -> t.Optional[exp.Partition]: 3060 if not self._match_texts(self.PARTITION_KEYWORDS): 3061 return None 3062 3063 return self.expression( 3064 exp.Partition, 3065 subpartition=self._prev.text.upper() == "SUBPARTITION", 3066 expressions=self._parse_wrapped_csv(self._parse_assignment), 3067 ) 3068 3069 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3070 def _parse_value_expression() -> t.Optional[exp.Expression]: 3071 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3072 return exp.var(self._prev.text.upper()) 3073 return self._parse_expression() 3074 3075 if self._match(TokenType.L_PAREN): 3076 expressions = self._parse_csv(_parse_value_expression) 3077 self._match_r_paren() 3078 return self.expression(exp.Tuple, expressions=expressions) 3079 3080 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3081 expression = self._parse_expression() 3082 if expression: 3083 return self.expression(exp.Tuple, expressions=[expression]) 3084 return None 3085 3086 def _parse_projections(self) -> t.List[exp.Expression]: 3087 return self._parse_expressions() 3088 3089 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3090 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3091 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3092 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3093 ) 3094 elif self._match(TokenType.FROM): 3095 from_ = self._parse_from(skip_from_token=True) 3096 # Support parentheses for duckdb FROM-first syntax 3097 select = self._parse_select() 3098 if select: 3099 select.set("from", from_) 3100 this = select 3101 else: 3102 this = exp.select("*").from_(t.cast(exp.From, from_)) 3103 else: 3104 this = ( 3105 self._parse_table() 3106 if table 3107 else self._parse_select(nested=True, parse_set_operation=False) 3108 ) 3109 3110 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3111 # in case a modifier (e.g. join) is following 3112 if table and isinstance(this, exp.Values) and this.alias: 3113 alias = this.args["alias"].pop() 3114 this = exp.Table(this=this, alias=alias) 3115 3116 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3117 3118 return this 3119 3120 def _parse_select( 3121 self, 3122 nested: bool = False, 3123 table: bool = False, 3124 parse_subquery_alias: bool = True, 3125 parse_set_operation: bool = True, 3126 ) -> t.Optional[exp.Expression]: 3127 cte = self._parse_with() 3128 3129 if cte: 3130 this = self._parse_statement() 3131 3132 if not this: 3133 self.raise_error("Failed to parse any statement following CTE") 3134 return cte 3135 3136 if "with" in this.arg_types: 3137 this.set("with", cte) 3138 else: 3139 self.raise_error(f"{this.key} does not support CTE") 3140 this = cte 3141 3142 return this 3143 3144 # duckdb supports leading with FROM x 3145 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3146 3147 if self._match(TokenType.SELECT): 3148 comments = self._prev_comments 3149 3150 hint = self._parse_hint() 3151 3152 if self._next and not self._next.token_type == TokenType.DOT: 3153 all_ = self._match(TokenType.ALL) 3154 distinct = self._match_set(self.DISTINCT_TOKENS) 3155 else: 3156 all_, distinct = None, None 3157 3158 kind = ( 3159 self._match(TokenType.ALIAS) 3160 and self._match_texts(("STRUCT", "VALUE")) 3161 and self._prev.text.upper() 3162 ) 3163 3164 if distinct: 3165 distinct = self.expression( 3166 exp.Distinct, 3167 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3168 ) 3169 3170 if all_ and distinct: 3171 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3172 3173 operation_modifiers = [] 3174 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3175 operation_modifiers.append(exp.var(self._prev.text.upper())) 3176 3177 limit = self._parse_limit(top=True) 3178 projections = self._parse_projections() 3179 3180 this = self.expression( 3181 exp.Select, 3182 kind=kind, 3183 hint=hint, 3184 distinct=distinct, 3185 expressions=projections, 3186 limit=limit, 3187 operation_modifiers=operation_modifiers or None, 3188 ) 3189 this.comments = comments 3190 3191 into = self._parse_into() 3192 if into: 3193 this.set("into", into) 3194 3195 if not from_: 3196 from_ = self._parse_from() 3197 3198 if from_: 3199 this.set("from", from_) 3200 3201 this = self._parse_query_modifiers(this) 3202 elif (table or nested) and self._match(TokenType.L_PAREN): 3203 this = self._parse_wrapped_select(table=table) 3204 3205 # We return early here so that the UNION isn't attached to the subquery by the 3206 # following call to _parse_set_operations, but instead becomes the parent node 3207 self._match_r_paren() 3208 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3209 elif self._match(TokenType.VALUES, advance=False): 3210 this = self._parse_derived_table_values() 3211 elif from_: 3212 this = exp.select("*").from_(from_.this, copy=False) 3213 elif self._match(TokenType.SUMMARIZE): 3214 table = self._match(TokenType.TABLE) 3215 this = self._parse_select() or self._parse_string() or self._parse_table() 3216 return self.expression(exp.Summarize, this=this, table=table) 3217 elif self._match(TokenType.DESCRIBE): 3218 this = self._parse_describe() 3219 elif self._match_text_seq("STREAM"): 3220 this = self._parse_function() 3221 if this: 3222 this = self.expression(exp.Stream, this=this) 3223 else: 3224 self._retreat(self._index - 1) 3225 else: 3226 this = None 3227 3228 return self._parse_set_operations(this) if parse_set_operation else this 3229 3230 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3231 self._match_text_seq("SEARCH") 3232 3233 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3234 3235 if not kind: 3236 return None 3237 3238 self._match_text_seq("FIRST", "BY") 3239 3240 return self.expression( 3241 exp.RecursiveWithSearch, 3242 kind=kind, 3243 this=self._parse_id_var(), 3244 expression=self._match_text_seq("SET") and self._parse_id_var(), 3245 using=self._match_text_seq("USING") and self._parse_id_var(), 3246 ) 3247 3248 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3249 if not skip_with_token and not self._match(TokenType.WITH): 3250 return None 3251 3252 comments = self._prev_comments 3253 recursive = self._match(TokenType.RECURSIVE) 3254 3255 last_comments = None 3256 expressions = [] 3257 while True: 3258 cte = self._parse_cte() 3259 if isinstance(cte, exp.CTE): 3260 expressions.append(cte) 3261 if last_comments: 3262 cte.add_comments(last_comments) 3263 3264 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3265 break 3266 else: 3267 self._match(TokenType.WITH) 3268 3269 last_comments = self._prev_comments 3270 3271 return self.expression( 3272 exp.With, 3273 comments=comments, 3274 expressions=expressions, 3275 recursive=recursive, 3276 search=self._parse_recursive_with_search(), 3277 ) 3278 3279 def _parse_cte(self) -> t.Optional[exp.CTE]: 3280 index = self._index 3281 3282 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3283 if not alias or not alias.this: 3284 self.raise_error("Expected CTE to have alias") 3285 3286 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3287 self._retreat(index) 3288 return None 3289 3290 comments = self._prev_comments 3291 3292 if self._match_text_seq("NOT", "MATERIALIZED"): 3293 materialized = False 3294 elif self._match_text_seq("MATERIALIZED"): 3295 materialized = True 3296 else: 3297 materialized = None 3298 3299 cte = self.expression( 3300 exp.CTE, 3301 this=self._parse_wrapped(self._parse_statement), 3302 alias=alias, 3303 materialized=materialized, 3304 comments=comments, 3305 ) 3306 3307 if isinstance(cte.this, exp.Values): 3308 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3309 3310 return cte 3311 3312 def _parse_table_alias( 3313 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3314 ) -> t.Optional[exp.TableAlias]: 3315 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3316 # so this section tries to parse the clause version and if it fails, it treats the token 3317 # as an identifier (alias) 3318 if self._can_parse_limit_or_offset(): 3319 return None 3320 3321 any_token = self._match(TokenType.ALIAS) 3322 alias = ( 3323 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3324 or self._parse_string_as_identifier() 3325 ) 3326 3327 index = self._index 3328 if self._match(TokenType.L_PAREN): 3329 columns = self._parse_csv(self._parse_function_parameter) 3330 self._match_r_paren() if columns else self._retreat(index) 3331 else: 3332 columns = None 3333 3334 if not alias and not columns: 3335 return None 3336 3337 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3338 3339 # We bubble up comments from the Identifier to the TableAlias 3340 if isinstance(alias, exp.Identifier): 3341 table_alias.add_comments(alias.pop_comments()) 3342 3343 return table_alias 3344 3345 def _parse_subquery( 3346 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3347 ) -> t.Optional[exp.Subquery]: 3348 if not this: 3349 return None 3350 3351 return self.expression( 3352 exp.Subquery, 3353 this=this, 3354 pivots=self._parse_pivots(), 3355 alias=self._parse_table_alias() if parse_alias else None, 3356 sample=self._parse_table_sample(), 3357 ) 3358 3359 def _implicit_unnests_to_explicit(self, this: E) -> E: 3360 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3361 3362 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3363 for i, join in enumerate(this.args.get("joins") or []): 3364 table = join.this 3365 normalized_table = table.copy() 3366 normalized_table.meta["maybe_column"] = True 3367 normalized_table = _norm(normalized_table, dialect=self.dialect) 3368 3369 if isinstance(table, exp.Table) and not join.args.get("on"): 3370 if normalized_table.parts[0].name in refs: 3371 table_as_column = table.to_column() 3372 unnest = exp.Unnest(expressions=[table_as_column]) 3373 3374 # Table.to_column creates a parent Alias node that we want to convert to 3375 # a TableAlias and attach to the Unnest, so it matches the parser's output 3376 if isinstance(table.args.get("alias"), exp.TableAlias): 3377 table_as_column.replace(table_as_column.this) 3378 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3379 3380 table.replace(unnest) 3381 3382 refs.add(normalized_table.alias_or_name) 3383 3384 return this 3385 3386 def _parse_query_modifiers( 3387 self, this: t.Optional[exp.Expression] 3388 ) -> t.Optional[exp.Expression]: 3389 if isinstance(this, self.MODIFIABLES): 3390 for join in self._parse_joins(): 3391 this.append("joins", join) 3392 for lateral in iter(self._parse_lateral, None): 3393 this.append("laterals", lateral) 3394 3395 while True: 3396 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3397 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3398 key, expression = parser(self) 3399 3400 if expression: 3401 this.set(key, expression) 3402 if key == "limit": 3403 offset = expression.args.pop("offset", None) 3404 3405 if offset: 3406 offset = exp.Offset(expression=offset) 3407 this.set("offset", offset) 3408 3409 limit_by_expressions = expression.expressions 3410 expression.set("expressions", None) 3411 offset.set("expressions", limit_by_expressions) 3412 continue 3413 break 3414 3415 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3416 this = self._implicit_unnests_to_explicit(this) 3417 3418 return this 3419 3420 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3421 start = self._curr 3422 while self._curr: 3423 self._advance() 3424 3425 end = self._tokens[self._index - 1] 3426 return exp.Hint(expressions=[self._find_sql(start, end)]) 3427 3428 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3429 return self._parse_function_call() 3430 3431 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3432 start_index = self._index 3433 should_fallback_to_string = False 3434 3435 hints = [] 3436 try: 3437 for hint in iter( 3438 lambda: self._parse_csv( 3439 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3440 ), 3441 [], 3442 ): 3443 hints.extend(hint) 3444 except ParseError: 3445 should_fallback_to_string = True 3446 3447 if should_fallback_to_string or self._curr: 3448 self._retreat(start_index) 3449 return self._parse_hint_fallback_to_string() 3450 3451 return self.expression(exp.Hint, expressions=hints) 3452 3453 def _parse_hint(self) -> t.Optional[exp.Hint]: 3454 if self._match(TokenType.HINT) and self._prev_comments: 3455 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3456 3457 return None 3458 3459 def _parse_into(self) -> t.Optional[exp.Into]: 3460 if not self._match(TokenType.INTO): 3461 return None 3462 3463 temp = self._match(TokenType.TEMPORARY) 3464 unlogged = self._match_text_seq("UNLOGGED") 3465 self._match(TokenType.TABLE) 3466 3467 return self.expression( 3468 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3469 ) 3470 3471 def _parse_from( 3472 self, joins: bool = False, skip_from_token: bool = False 3473 ) -> t.Optional[exp.From]: 3474 if not skip_from_token and not self._match(TokenType.FROM): 3475 return None 3476 3477 return self.expression( 3478 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3479 ) 3480 3481 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3482 return self.expression( 3483 exp.MatchRecognizeMeasure, 3484 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3485 this=self._parse_expression(), 3486 ) 3487 3488 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3489 if not self._match(TokenType.MATCH_RECOGNIZE): 3490 return None 3491 3492 self._match_l_paren() 3493 3494 partition = self._parse_partition_by() 3495 order = self._parse_order() 3496 3497 measures = ( 3498 self._parse_csv(self._parse_match_recognize_measure) 3499 if self._match_text_seq("MEASURES") 3500 else None 3501 ) 3502 3503 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3504 rows = exp.var("ONE ROW PER MATCH") 3505 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3506 text = "ALL ROWS PER MATCH" 3507 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3508 text += " SHOW EMPTY MATCHES" 3509 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3510 text += " OMIT EMPTY MATCHES" 3511 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3512 text += " WITH UNMATCHED ROWS" 3513 rows = exp.var(text) 3514 else: 3515 rows = None 3516 3517 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3518 text = "AFTER MATCH SKIP" 3519 if self._match_text_seq("PAST", "LAST", "ROW"): 3520 text += " PAST LAST ROW" 3521 elif self._match_text_seq("TO", "NEXT", "ROW"): 3522 text += " TO NEXT ROW" 3523 elif self._match_text_seq("TO", "FIRST"): 3524 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3525 elif self._match_text_seq("TO", "LAST"): 3526 text += f" TO LAST {self._advance_any().text}" # type: ignore 3527 after = exp.var(text) 3528 else: 3529 after = None 3530 3531 if self._match_text_seq("PATTERN"): 3532 self._match_l_paren() 3533 3534 if not self._curr: 3535 self.raise_error("Expecting )", self._curr) 3536 3537 paren = 1 3538 start = self._curr 3539 3540 while self._curr and paren > 0: 3541 if self._curr.token_type == TokenType.L_PAREN: 3542 paren += 1 3543 if self._curr.token_type == TokenType.R_PAREN: 3544 paren -= 1 3545 3546 end = self._prev 3547 self._advance() 3548 3549 if paren > 0: 3550 self.raise_error("Expecting )", self._curr) 3551 3552 pattern = exp.var(self._find_sql(start, end)) 3553 else: 3554 pattern = None 3555 3556 define = ( 3557 self._parse_csv(self._parse_name_as_expression) 3558 if self._match_text_seq("DEFINE") 3559 else None 3560 ) 3561 3562 self._match_r_paren() 3563 3564 return self.expression( 3565 exp.MatchRecognize, 3566 partition_by=partition, 3567 order=order, 3568 measures=measures, 3569 rows=rows, 3570 after=after, 3571 pattern=pattern, 3572 define=define, 3573 alias=self._parse_table_alias(), 3574 ) 3575 3576 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3577 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3578 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3579 cross_apply = False 3580 3581 if cross_apply is not None: 3582 this = self._parse_select(table=True) 3583 view = None 3584 outer = None 3585 elif self._match(TokenType.LATERAL): 3586 this = self._parse_select(table=True) 3587 view = self._match(TokenType.VIEW) 3588 outer = self._match(TokenType.OUTER) 3589 else: 3590 return None 3591 3592 if not this: 3593 this = ( 3594 self._parse_unnest() 3595 or self._parse_function() 3596 or self._parse_id_var(any_token=False) 3597 ) 3598 3599 while self._match(TokenType.DOT): 3600 this = exp.Dot( 3601 this=this, 3602 expression=self._parse_function() or self._parse_id_var(any_token=False), 3603 ) 3604 3605 ordinality: t.Optional[bool] = None 3606 3607 if view: 3608 table = self._parse_id_var(any_token=False) 3609 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3610 table_alias: t.Optional[exp.TableAlias] = self.expression( 3611 exp.TableAlias, this=table, columns=columns 3612 ) 3613 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3614 # We move the alias from the lateral's child node to the lateral itself 3615 table_alias = this.args["alias"].pop() 3616 else: 3617 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3618 table_alias = self._parse_table_alias() 3619 3620 return self.expression( 3621 exp.Lateral, 3622 this=this, 3623 view=view, 3624 outer=outer, 3625 alias=table_alias, 3626 cross_apply=cross_apply, 3627 ordinality=ordinality, 3628 ) 3629 3630 def _parse_join_parts( 3631 self, 3632 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3633 return ( 3634 self._match_set(self.JOIN_METHODS) and self._prev, 3635 self._match_set(self.JOIN_SIDES) and self._prev, 3636 self._match_set(self.JOIN_KINDS) and self._prev, 3637 ) 3638 3639 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3640 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3641 this = self._parse_column() 3642 if isinstance(this, exp.Column): 3643 return this.this 3644 return this 3645 3646 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3647 3648 def _parse_join( 3649 self, skip_join_token: bool = False, parse_bracket: bool = False 3650 ) -> t.Optional[exp.Join]: 3651 if self._match(TokenType.COMMA): 3652 table = self._try_parse(self._parse_table) 3653 if table: 3654 return self.expression(exp.Join, this=table) 3655 return None 3656 3657 index = self._index 3658 method, side, kind = self._parse_join_parts() 3659 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3660 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3661 3662 if not skip_join_token and not join: 3663 self._retreat(index) 3664 kind = None 3665 method = None 3666 side = None 3667 3668 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3669 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3670 3671 if not skip_join_token and not join and not outer_apply and not cross_apply: 3672 return None 3673 3674 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3675 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3676 kwargs["expressions"] = self._parse_csv( 3677 lambda: self._parse_table(parse_bracket=parse_bracket) 3678 ) 3679 3680 if method: 3681 kwargs["method"] = method.text 3682 if side: 3683 kwargs["side"] = side.text 3684 if kind: 3685 kwargs["kind"] = kind.text 3686 if hint: 3687 kwargs["hint"] = hint 3688 3689 if self._match(TokenType.MATCH_CONDITION): 3690 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3691 3692 if self._match(TokenType.ON): 3693 kwargs["on"] = self._parse_assignment() 3694 elif self._match(TokenType.USING): 3695 kwargs["using"] = self._parse_using_identifiers() 3696 elif ( 3697 not (outer_apply or cross_apply) 3698 and not isinstance(kwargs["this"], exp.Unnest) 3699 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3700 ): 3701 index = self._index 3702 joins: t.Optional[list] = list(self._parse_joins()) 3703 3704 if joins and self._match(TokenType.ON): 3705 kwargs["on"] = self._parse_assignment() 3706 elif joins and self._match(TokenType.USING): 3707 kwargs["using"] = self._parse_using_identifiers() 3708 else: 3709 joins = None 3710 self._retreat(index) 3711 3712 kwargs["this"].set("joins", joins if joins else None) 3713 3714 comments = [c for token in (method, side, kind) if token for c in token.comments] 3715 return self.expression(exp.Join, comments=comments, **kwargs) 3716 3717 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3718 this = self._parse_assignment() 3719 3720 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3721 return this 3722 3723 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3724 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3725 3726 return this 3727 3728 def _parse_index_params(self) -> exp.IndexParameters: 3729 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3730 3731 if self._match(TokenType.L_PAREN, advance=False): 3732 columns = self._parse_wrapped_csv(self._parse_with_operator) 3733 else: 3734 columns = None 3735 3736 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3737 partition_by = self._parse_partition_by() 3738 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3739 tablespace = ( 3740 self._parse_var(any_token=True) 3741 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3742 else None 3743 ) 3744 where = self._parse_where() 3745 3746 on = self._parse_field() if self._match(TokenType.ON) else None 3747 3748 return self.expression( 3749 exp.IndexParameters, 3750 using=using, 3751 columns=columns, 3752 include=include, 3753 partition_by=partition_by, 3754 where=where, 3755 with_storage=with_storage, 3756 tablespace=tablespace, 3757 on=on, 3758 ) 3759 3760 def _parse_index( 3761 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3762 ) -> t.Optional[exp.Index]: 3763 if index or anonymous: 3764 unique = None 3765 primary = None 3766 amp = None 3767 3768 self._match(TokenType.ON) 3769 self._match(TokenType.TABLE) # hive 3770 table = self._parse_table_parts(schema=True) 3771 else: 3772 unique = self._match(TokenType.UNIQUE) 3773 primary = self._match_text_seq("PRIMARY") 3774 amp = self._match_text_seq("AMP") 3775 3776 if not self._match(TokenType.INDEX): 3777 return None 3778 3779 index = self._parse_id_var() 3780 table = None 3781 3782 params = self._parse_index_params() 3783 3784 return self.expression( 3785 exp.Index, 3786 this=index, 3787 table=table, 3788 unique=unique, 3789 primary=primary, 3790 amp=amp, 3791 params=params, 3792 ) 3793 3794 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3795 hints: t.List[exp.Expression] = [] 3796 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3797 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3798 hints.append( 3799 self.expression( 3800 exp.WithTableHint, 3801 expressions=self._parse_csv( 3802 lambda: self._parse_function() or self._parse_var(any_token=True) 3803 ), 3804 ) 3805 ) 3806 self._match_r_paren() 3807 else: 3808 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3809 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3810 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3811 3812 self._match_set((TokenType.INDEX, TokenType.KEY)) 3813 if self._match(TokenType.FOR): 3814 hint.set("target", self._advance_any() and self._prev.text.upper()) 3815 3816 hint.set("expressions", self._parse_wrapped_id_vars()) 3817 hints.append(hint) 3818 3819 return hints or None 3820 3821 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3822 return ( 3823 (not schema and self._parse_function(optional_parens=False)) 3824 or self._parse_id_var(any_token=False) 3825 or self._parse_string_as_identifier() 3826 or self._parse_placeholder() 3827 ) 3828 3829 def _parse_table_parts( 3830 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3831 ) -> exp.Table: 3832 catalog = None 3833 db = None 3834 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3835 3836 while self._match(TokenType.DOT): 3837 if catalog: 3838 # This allows nesting the table in arbitrarily many dot expressions if needed 3839 table = self.expression( 3840 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3841 ) 3842 else: 3843 catalog = db 3844 db = table 3845 # "" used for tsql FROM a..b case 3846 table = self._parse_table_part(schema=schema) or "" 3847 3848 if ( 3849 wildcard 3850 and self._is_connected() 3851 and (isinstance(table, exp.Identifier) or not table) 3852 and self._match(TokenType.STAR) 3853 ): 3854 if isinstance(table, exp.Identifier): 3855 table.args["this"] += "*" 3856 else: 3857 table = exp.Identifier(this="*") 3858 3859 # We bubble up comments from the Identifier to the Table 3860 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3861 3862 if is_db_reference: 3863 catalog = db 3864 db = table 3865 table = None 3866 3867 if not table and not is_db_reference: 3868 self.raise_error(f"Expected table name but got {self._curr}") 3869 if not db and is_db_reference: 3870 self.raise_error(f"Expected database name but got {self._curr}") 3871 3872 table = self.expression( 3873 exp.Table, 3874 comments=comments, 3875 this=table, 3876 db=db, 3877 catalog=catalog, 3878 ) 3879 3880 changes = self._parse_changes() 3881 if changes: 3882 table.set("changes", changes) 3883 3884 at_before = self._parse_historical_data() 3885 if at_before: 3886 table.set("when", at_before) 3887 3888 pivots = self._parse_pivots() 3889 if pivots: 3890 table.set("pivots", pivots) 3891 3892 return table 3893 3894 def _parse_table( 3895 self, 3896 schema: bool = False, 3897 joins: bool = False, 3898 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3899 parse_bracket: bool = False, 3900 is_db_reference: bool = False, 3901 parse_partition: bool = False, 3902 ) -> t.Optional[exp.Expression]: 3903 lateral = self._parse_lateral() 3904 if lateral: 3905 return lateral 3906 3907 unnest = self._parse_unnest() 3908 if unnest: 3909 return unnest 3910 3911 values = self._parse_derived_table_values() 3912 if values: 3913 return values 3914 3915 subquery = self._parse_select(table=True) 3916 if subquery: 3917 if not subquery.args.get("pivots"): 3918 subquery.set("pivots", self._parse_pivots()) 3919 return subquery 3920 3921 bracket = parse_bracket and self._parse_bracket(None) 3922 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3923 3924 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3925 self._parse_table 3926 ) 3927 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3928 3929 only = self._match(TokenType.ONLY) 3930 3931 this = t.cast( 3932 exp.Expression, 3933 bracket 3934 or rows_from 3935 or self._parse_bracket( 3936 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3937 ), 3938 ) 3939 3940 if only: 3941 this.set("only", only) 3942 3943 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3944 self._match_text_seq("*") 3945 3946 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3947 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3948 this.set("partition", self._parse_partition()) 3949 3950 if schema: 3951 return self._parse_schema(this=this) 3952 3953 version = self._parse_version() 3954 3955 if version: 3956 this.set("version", version) 3957 3958 if self.dialect.ALIAS_POST_TABLESAMPLE: 3959 this.set("sample", self._parse_table_sample()) 3960 3961 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3962 if alias: 3963 this.set("alias", alias) 3964 3965 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3966 return self.expression( 3967 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3968 ) 3969 3970 this.set("hints", self._parse_table_hints()) 3971 3972 if not this.args.get("pivots"): 3973 this.set("pivots", self._parse_pivots()) 3974 3975 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3976 this.set("sample", self._parse_table_sample()) 3977 3978 if joins: 3979 for join in self._parse_joins(): 3980 this.append("joins", join) 3981 3982 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3983 this.set("ordinality", True) 3984 this.set("alias", self._parse_table_alias()) 3985 3986 return this 3987 3988 def _parse_version(self) -> t.Optional[exp.Version]: 3989 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3990 this = "TIMESTAMP" 3991 elif self._match(TokenType.VERSION_SNAPSHOT): 3992 this = "VERSION" 3993 else: 3994 return None 3995 3996 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3997 kind = self._prev.text.upper() 3998 start = self._parse_bitwise() 3999 self._match_texts(("TO", "AND")) 4000 end = self._parse_bitwise() 4001 expression: t.Optional[exp.Expression] = self.expression( 4002 exp.Tuple, expressions=[start, end] 4003 ) 4004 elif self._match_text_seq("CONTAINED", "IN"): 4005 kind = "CONTAINED IN" 4006 expression = self.expression( 4007 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4008 ) 4009 elif self._match(TokenType.ALL): 4010 kind = "ALL" 4011 expression = None 4012 else: 4013 self._match_text_seq("AS", "OF") 4014 kind = "AS OF" 4015 expression = self._parse_type() 4016 4017 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4018 4019 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4020 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4021 index = self._index 4022 historical_data = None 4023 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4024 this = self._prev.text.upper() 4025 kind = ( 4026 self._match(TokenType.L_PAREN) 4027 and self._match_texts(self.HISTORICAL_DATA_KIND) 4028 and self._prev.text.upper() 4029 ) 4030 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4031 4032 if expression: 4033 self._match_r_paren() 4034 historical_data = self.expression( 4035 exp.HistoricalData, this=this, kind=kind, expression=expression 4036 ) 4037 else: 4038 self._retreat(index) 4039 4040 return historical_data 4041 4042 def _parse_changes(self) -> t.Optional[exp.Changes]: 4043 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4044 return None 4045 4046 information = self._parse_var(any_token=True) 4047 self._match_r_paren() 4048 4049 return self.expression( 4050 exp.Changes, 4051 information=information, 4052 at_before=self._parse_historical_data(), 4053 end=self._parse_historical_data(), 4054 ) 4055 4056 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4057 if not self._match(TokenType.UNNEST): 4058 return None 4059 4060 expressions = self._parse_wrapped_csv(self._parse_equality) 4061 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4062 4063 alias = self._parse_table_alias() if with_alias else None 4064 4065 if alias: 4066 if self.dialect.UNNEST_COLUMN_ONLY: 4067 if alias.args.get("columns"): 4068 self.raise_error("Unexpected extra column alias in unnest.") 4069 4070 alias.set("columns", [alias.this]) 4071 alias.set("this", None) 4072 4073 columns = alias.args.get("columns") or [] 4074 if offset and len(expressions) < len(columns): 4075 offset = columns.pop() 4076 4077 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4078 self._match(TokenType.ALIAS) 4079 offset = self._parse_id_var( 4080 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4081 ) or exp.to_identifier("offset") 4082 4083 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4084 4085 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4086 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4087 if not is_derived and not ( 4088 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4089 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4090 ): 4091 return None 4092 4093 expressions = self._parse_csv(self._parse_value) 4094 alias = self._parse_table_alias() 4095 4096 if is_derived: 4097 self._match_r_paren() 4098 4099 return self.expression( 4100 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4101 ) 4102 4103 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4104 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4105 as_modifier and self._match_text_seq("USING", "SAMPLE") 4106 ): 4107 return None 4108 4109 bucket_numerator = None 4110 bucket_denominator = None 4111 bucket_field = None 4112 percent = None 4113 size = None 4114 seed = None 4115 4116 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4117 matched_l_paren = self._match(TokenType.L_PAREN) 4118 4119 if self.TABLESAMPLE_CSV: 4120 num = None 4121 expressions = self._parse_csv(self._parse_primary) 4122 else: 4123 expressions = None 4124 num = ( 4125 self._parse_factor() 4126 if self._match(TokenType.NUMBER, advance=False) 4127 else self._parse_primary() or self._parse_placeholder() 4128 ) 4129 4130 if self._match_text_seq("BUCKET"): 4131 bucket_numerator = self._parse_number() 4132 self._match_text_seq("OUT", "OF") 4133 bucket_denominator = bucket_denominator = self._parse_number() 4134 self._match(TokenType.ON) 4135 bucket_field = self._parse_field() 4136 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4137 percent = num 4138 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4139 size = num 4140 else: 4141 percent = num 4142 4143 if matched_l_paren: 4144 self._match_r_paren() 4145 4146 if self._match(TokenType.L_PAREN): 4147 method = self._parse_var(upper=True) 4148 seed = self._match(TokenType.COMMA) and self._parse_number() 4149 self._match_r_paren() 4150 elif self._match_texts(("SEED", "REPEATABLE")): 4151 seed = self._parse_wrapped(self._parse_number) 4152 4153 if not method and self.DEFAULT_SAMPLING_METHOD: 4154 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4155 4156 return self.expression( 4157 exp.TableSample, 4158 expressions=expressions, 4159 method=method, 4160 bucket_numerator=bucket_numerator, 4161 bucket_denominator=bucket_denominator, 4162 bucket_field=bucket_field, 4163 percent=percent, 4164 size=size, 4165 seed=seed, 4166 ) 4167 4168 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4169 return list(iter(self._parse_pivot, None)) or None 4170 4171 def _parse_joins(self) -> t.Iterator[exp.Join]: 4172 return iter(self._parse_join, None) 4173 4174 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4175 if not self._match(TokenType.INTO): 4176 return None 4177 4178 return self.expression( 4179 exp.UnpivotColumns, 4180 this=self._match_text_seq("NAME") and self._parse_column(), 4181 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4182 ) 4183 4184 # https://duckdb.org/docs/sql/statements/pivot 4185 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4186 def _parse_on() -> t.Optional[exp.Expression]: 4187 this = self._parse_bitwise() 4188 4189 if self._match(TokenType.IN): 4190 # PIVOT ... ON col IN (row_val1, row_val2) 4191 return self._parse_in(this) 4192 if self._match(TokenType.ALIAS, advance=False): 4193 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4194 return self._parse_alias(this) 4195 4196 return this 4197 4198 this = self._parse_table() 4199 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4200 into = self._parse_unpivot_columns() 4201 using = self._match(TokenType.USING) and self._parse_csv( 4202 lambda: self._parse_alias(self._parse_function()) 4203 ) 4204 group = self._parse_group() 4205 4206 return self.expression( 4207 exp.Pivot, 4208 this=this, 4209 expressions=expressions, 4210 using=using, 4211 group=group, 4212 unpivot=is_unpivot, 4213 into=into, 4214 ) 4215 4216 def _parse_pivot_in(self) -> exp.In: 4217 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4218 this = self._parse_select_or_expression() 4219 4220 self._match(TokenType.ALIAS) 4221 alias = self._parse_bitwise() 4222 if alias: 4223 if isinstance(alias, exp.Column) and not alias.db: 4224 alias = alias.this 4225 return self.expression(exp.PivotAlias, this=this, alias=alias) 4226 4227 return this 4228 4229 value = self._parse_column() 4230 4231 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4232 self.raise_error("Expecting IN (") 4233 4234 if self._match(TokenType.ANY): 4235 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4236 else: 4237 exprs = self._parse_csv(_parse_aliased_expression) 4238 4239 self._match_r_paren() 4240 return self.expression(exp.In, this=value, expressions=exprs) 4241 4242 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4243 index = self._index 4244 include_nulls = None 4245 4246 if self._match(TokenType.PIVOT): 4247 unpivot = False 4248 elif self._match(TokenType.UNPIVOT): 4249 unpivot = True 4250 4251 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4252 if self._match_text_seq("INCLUDE", "NULLS"): 4253 include_nulls = True 4254 elif self._match_text_seq("EXCLUDE", "NULLS"): 4255 include_nulls = False 4256 else: 4257 return None 4258 4259 expressions = [] 4260 4261 if not self._match(TokenType.L_PAREN): 4262 self._retreat(index) 4263 return None 4264 4265 if unpivot: 4266 expressions = self._parse_csv(self._parse_column) 4267 else: 4268 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4269 4270 if not expressions: 4271 self.raise_error("Failed to parse PIVOT's aggregation list") 4272 4273 if not self._match(TokenType.FOR): 4274 self.raise_error("Expecting FOR") 4275 4276 fields = [] 4277 while True: 4278 field = self._try_parse(self._parse_pivot_in) 4279 if not field: 4280 break 4281 fields.append(field) 4282 4283 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4284 self._parse_bitwise 4285 ) 4286 4287 group = self._parse_group() 4288 4289 self._match_r_paren() 4290 4291 pivot = self.expression( 4292 exp.Pivot, 4293 expressions=expressions, 4294 fields=fields, 4295 unpivot=unpivot, 4296 include_nulls=include_nulls, 4297 default_on_null=default_on_null, 4298 group=group, 4299 ) 4300 4301 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4302 pivot.set("alias", self._parse_table_alias()) 4303 4304 if not unpivot: 4305 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4306 4307 columns: t.List[exp.Expression] = [] 4308 all_fields = [] 4309 for pivot_field in pivot.fields: 4310 pivot_field_expressions = pivot_field.expressions 4311 4312 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4313 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4314 continue 4315 4316 all_fields.append( 4317 [ 4318 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4319 for fld in pivot_field_expressions 4320 ] 4321 ) 4322 4323 if all_fields: 4324 if names: 4325 all_fields.append(names) 4326 4327 # Generate all possible combinations of the pivot columns 4328 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4329 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4330 for fld_parts_tuple in itertools.product(*all_fields): 4331 fld_parts = list(fld_parts_tuple) 4332 4333 if names and self.PREFIXED_PIVOT_COLUMNS: 4334 # Move the "name" to the front of the list 4335 fld_parts.insert(0, fld_parts.pop(-1)) 4336 4337 columns.append(exp.to_identifier("_".join(fld_parts))) 4338 4339 pivot.set("columns", columns) 4340 4341 return pivot 4342 4343 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4344 return [agg.alias for agg in aggregations if agg.alias] 4345 4346 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4347 if not skip_where_token and not self._match(TokenType.PREWHERE): 4348 return None 4349 4350 return self.expression( 4351 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4352 ) 4353 4354 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4355 if not skip_where_token and not self._match(TokenType.WHERE): 4356 return None 4357 4358 return self.expression( 4359 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4360 ) 4361 4362 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4363 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4364 return None 4365 4366 elements: t.Dict[str, t.Any] = defaultdict(list) 4367 4368 if self._match(TokenType.ALL): 4369 elements["all"] = True 4370 elif self._match(TokenType.DISTINCT): 4371 elements["all"] = False 4372 4373 while True: 4374 index = self._index 4375 4376 elements["expressions"].extend( 4377 self._parse_csv( 4378 lambda: None 4379 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4380 else self._parse_assignment() 4381 ) 4382 ) 4383 4384 before_with_index = self._index 4385 with_prefix = self._match(TokenType.WITH) 4386 4387 if self._match(TokenType.ROLLUP): 4388 elements["rollup"].append( 4389 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4390 ) 4391 elif self._match(TokenType.CUBE): 4392 elements["cube"].append( 4393 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4394 ) 4395 elif self._match(TokenType.GROUPING_SETS): 4396 elements["grouping_sets"].append( 4397 self.expression( 4398 exp.GroupingSets, 4399 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4400 ) 4401 ) 4402 elif self._match_text_seq("TOTALS"): 4403 elements["totals"] = True # type: ignore 4404 4405 if before_with_index <= self._index <= before_with_index + 1: 4406 self._retreat(before_with_index) 4407 break 4408 4409 if index == self._index: 4410 break 4411 4412 return self.expression(exp.Group, **elements) # type: ignore 4413 4414 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4415 return self.expression( 4416 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4417 ) 4418 4419 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4420 if self._match(TokenType.L_PAREN): 4421 grouping_set = self._parse_csv(self._parse_column) 4422 self._match_r_paren() 4423 return self.expression(exp.Tuple, expressions=grouping_set) 4424 4425 return self._parse_column() 4426 4427 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4428 if not skip_having_token and not self._match(TokenType.HAVING): 4429 return None 4430 return self.expression(exp.Having, this=self._parse_assignment()) 4431 4432 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4433 if not self._match(TokenType.QUALIFY): 4434 return None 4435 return self.expression(exp.Qualify, this=self._parse_assignment()) 4436 4437 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4438 if skip_start_token: 4439 start = None 4440 elif self._match(TokenType.START_WITH): 4441 start = self._parse_assignment() 4442 else: 4443 return None 4444 4445 self._match(TokenType.CONNECT_BY) 4446 nocycle = self._match_text_seq("NOCYCLE") 4447 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4448 exp.Prior, this=self._parse_bitwise() 4449 ) 4450 connect = self._parse_assignment() 4451 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4452 4453 if not start and self._match(TokenType.START_WITH): 4454 start = self._parse_assignment() 4455 4456 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4457 4458 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4459 this = self._parse_id_var(any_token=True) 4460 if self._match(TokenType.ALIAS): 4461 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4462 return this 4463 4464 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4465 if self._match_text_seq("INTERPOLATE"): 4466 return self._parse_wrapped_csv(self._parse_name_as_expression) 4467 return None 4468 4469 def _parse_order( 4470 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4471 ) -> t.Optional[exp.Expression]: 4472 siblings = None 4473 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4474 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4475 return this 4476 4477 siblings = True 4478 4479 return self.expression( 4480 exp.Order, 4481 this=this, 4482 expressions=self._parse_csv(self._parse_ordered), 4483 siblings=siblings, 4484 ) 4485 4486 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4487 if not self._match(token): 4488 return None 4489 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4490 4491 def _parse_ordered( 4492 self, parse_method: t.Optional[t.Callable] = None 4493 ) -> t.Optional[exp.Ordered]: 4494 this = parse_method() if parse_method else self._parse_assignment() 4495 if not this: 4496 return None 4497 4498 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4499 this = exp.var("ALL") 4500 4501 asc = self._match(TokenType.ASC) 4502 desc = self._match(TokenType.DESC) or (asc and False) 4503 4504 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4505 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4506 4507 nulls_first = is_nulls_first or False 4508 explicitly_null_ordered = is_nulls_first or is_nulls_last 4509 4510 if ( 4511 not explicitly_null_ordered 4512 and ( 4513 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4514 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4515 ) 4516 and self.dialect.NULL_ORDERING != "nulls_are_last" 4517 ): 4518 nulls_first = True 4519 4520 if self._match_text_seq("WITH", "FILL"): 4521 with_fill = self.expression( 4522 exp.WithFill, 4523 **{ # type: ignore 4524 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4525 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4526 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4527 "interpolate": self._parse_interpolate(), 4528 }, 4529 ) 4530 else: 4531 with_fill = None 4532 4533 return self.expression( 4534 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4535 ) 4536 4537 def _parse_limit_options(self) -> exp.LimitOptions: 4538 percent = self._match(TokenType.PERCENT) 4539 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4540 self._match_text_seq("ONLY") 4541 with_ties = self._match_text_seq("WITH", "TIES") 4542 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4543 4544 def _parse_limit( 4545 self, 4546 this: t.Optional[exp.Expression] = None, 4547 top: bool = False, 4548 skip_limit_token: bool = False, 4549 ) -> t.Optional[exp.Expression]: 4550 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4551 comments = self._prev_comments 4552 if top: 4553 limit_paren = self._match(TokenType.L_PAREN) 4554 expression = self._parse_term() if limit_paren else self._parse_number() 4555 4556 if limit_paren: 4557 self._match_r_paren() 4558 4559 limit_options = self._parse_limit_options() 4560 else: 4561 limit_options = None 4562 expression = self._parse_term() 4563 4564 if self._match(TokenType.COMMA): 4565 offset = expression 4566 expression = self._parse_term() 4567 else: 4568 offset = None 4569 4570 limit_exp = self.expression( 4571 exp.Limit, 4572 this=this, 4573 expression=expression, 4574 offset=offset, 4575 comments=comments, 4576 limit_options=limit_options, 4577 expressions=self._parse_limit_by(), 4578 ) 4579 4580 return limit_exp 4581 4582 if self._match(TokenType.FETCH): 4583 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4584 direction = self._prev.text.upper() if direction else "FIRST" 4585 4586 count = self._parse_field(tokens=self.FETCH_TOKENS) 4587 4588 return self.expression( 4589 exp.Fetch, 4590 direction=direction, 4591 count=count, 4592 limit_options=self._parse_limit_options(), 4593 ) 4594 4595 return this 4596 4597 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4598 if not self._match(TokenType.OFFSET): 4599 return this 4600 4601 count = self._parse_term() 4602 self._match_set((TokenType.ROW, TokenType.ROWS)) 4603 4604 return self.expression( 4605 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4606 ) 4607 4608 def _can_parse_limit_or_offset(self) -> bool: 4609 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4610 return False 4611 4612 index = self._index 4613 result = bool( 4614 self._try_parse(self._parse_limit, retreat=True) 4615 or self._try_parse(self._parse_offset, retreat=True) 4616 ) 4617 self._retreat(index) 4618 return result 4619 4620 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4621 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4622 4623 def _parse_locks(self) -> t.List[exp.Lock]: 4624 locks = [] 4625 while True: 4626 if self._match_text_seq("FOR", "UPDATE"): 4627 update = True 4628 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4629 "LOCK", "IN", "SHARE", "MODE" 4630 ): 4631 update = False 4632 else: 4633 break 4634 4635 expressions = None 4636 if self._match_text_seq("OF"): 4637 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4638 4639 wait: t.Optional[bool | exp.Expression] = None 4640 if self._match_text_seq("NOWAIT"): 4641 wait = True 4642 elif self._match_text_seq("WAIT"): 4643 wait = self._parse_primary() 4644 elif self._match_text_seq("SKIP", "LOCKED"): 4645 wait = False 4646 4647 locks.append( 4648 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4649 ) 4650 4651 return locks 4652 4653 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4654 start = self._index 4655 _, side_token, kind_token = self._parse_join_parts() 4656 4657 side = side_token.text if side_token else None 4658 kind = kind_token.text if kind_token else None 4659 4660 if not self._match_set(self.SET_OPERATIONS): 4661 self._retreat(start) 4662 return None 4663 4664 token_type = self._prev.token_type 4665 4666 if token_type == TokenType.UNION: 4667 operation: t.Type[exp.SetOperation] = exp.Union 4668 elif token_type == TokenType.EXCEPT: 4669 operation = exp.Except 4670 else: 4671 operation = exp.Intersect 4672 4673 comments = self._prev.comments 4674 4675 if self._match(TokenType.DISTINCT): 4676 distinct: t.Optional[bool] = True 4677 elif self._match(TokenType.ALL): 4678 distinct = False 4679 else: 4680 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4681 if distinct is None: 4682 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4683 4684 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4685 "STRICT", "CORRESPONDING" 4686 ) 4687 if self._match_text_seq("CORRESPONDING"): 4688 by_name = True 4689 if not side and not kind: 4690 kind = "INNER" 4691 4692 on_column_list = None 4693 if by_name and self._match_texts(("ON", "BY")): 4694 on_column_list = self._parse_wrapped_csv(self._parse_column) 4695 4696 expression = self._parse_select(nested=True, parse_set_operation=False) 4697 4698 return self.expression( 4699 operation, 4700 comments=comments, 4701 this=this, 4702 distinct=distinct, 4703 by_name=by_name, 4704 expression=expression, 4705 side=side, 4706 kind=kind, 4707 on=on_column_list, 4708 ) 4709 4710 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4711 while True: 4712 setop = self.parse_set_operation(this) 4713 if not setop: 4714 break 4715 this = setop 4716 4717 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4718 expression = this.expression 4719 4720 if expression: 4721 for arg in self.SET_OP_MODIFIERS: 4722 expr = expression.args.get(arg) 4723 if expr: 4724 this.set(arg, expr.pop()) 4725 4726 return this 4727 4728 def _parse_expression(self) -> t.Optional[exp.Expression]: 4729 return self._parse_alias(self._parse_assignment()) 4730 4731 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4732 this = self._parse_disjunction() 4733 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4734 # This allows us to parse <non-identifier token> := <expr> 4735 this = exp.column( 4736 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4737 ) 4738 4739 while self._match_set(self.ASSIGNMENT): 4740 if isinstance(this, exp.Column) and len(this.parts) == 1: 4741 this = this.this 4742 4743 this = self.expression( 4744 self.ASSIGNMENT[self._prev.token_type], 4745 this=this, 4746 comments=self._prev_comments, 4747 expression=self._parse_assignment(), 4748 ) 4749 4750 return this 4751 4752 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4753 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4754 4755 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4756 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4757 4758 def _parse_equality(self) -> t.Optional[exp.Expression]: 4759 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4760 4761 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4762 return self._parse_tokens(self._parse_range, self.COMPARISON) 4763 4764 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4765 this = this or self._parse_bitwise() 4766 negate = self._match(TokenType.NOT) 4767 4768 if self._match_set(self.RANGE_PARSERS): 4769 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4770 if not expression: 4771 return this 4772 4773 this = expression 4774 elif self._match(TokenType.ISNULL): 4775 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4776 4777 # Postgres supports ISNULL and NOTNULL for conditions. 4778 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4779 if self._match(TokenType.NOTNULL): 4780 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4781 this = self.expression(exp.Not, this=this) 4782 4783 if negate: 4784 this = self._negate_range(this) 4785 4786 if self._match(TokenType.IS): 4787 this = self._parse_is(this) 4788 4789 return this 4790 4791 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4792 if not this: 4793 return this 4794 4795 return self.expression(exp.Not, this=this) 4796 4797 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4798 index = self._index - 1 4799 negate = self._match(TokenType.NOT) 4800 4801 if self._match_text_seq("DISTINCT", "FROM"): 4802 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4803 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4804 4805 if self._match(TokenType.JSON): 4806 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4807 4808 if self._match_text_seq("WITH"): 4809 _with = True 4810 elif self._match_text_seq("WITHOUT"): 4811 _with = False 4812 else: 4813 _with = None 4814 4815 unique = self._match(TokenType.UNIQUE) 4816 self._match_text_seq("KEYS") 4817 expression: t.Optional[exp.Expression] = self.expression( 4818 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4819 ) 4820 else: 4821 expression = self._parse_primary() or self._parse_null() 4822 if not expression: 4823 self._retreat(index) 4824 return None 4825 4826 this = self.expression(exp.Is, this=this, expression=expression) 4827 return self.expression(exp.Not, this=this) if negate else this 4828 4829 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4830 unnest = self._parse_unnest(with_alias=False) 4831 if unnest: 4832 this = self.expression(exp.In, this=this, unnest=unnest) 4833 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4834 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4835 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4836 4837 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4838 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4839 else: 4840 this = self.expression(exp.In, this=this, expressions=expressions) 4841 4842 if matched_l_paren: 4843 self._match_r_paren(this) 4844 elif not self._match(TokenType.R_BRACKET, expression=this): 4845 self.raise_error("Expecting ]") 4846 else: 4847 this = self.expression(exp.In, this=this, field=self._parse_column()) 4848 4849 return this 4850 4851 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4852 low = self._parse_bitwise() 4853 self._match(TokenType.AND) 4854 high = self._parse_bitwise() 4855 return self.expression(exp.Between, this=this, low=low, high=high) 4856 4857 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4858 if not self._match(TokenType.ESCAPE): 4859 return this 4860 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4861 4862 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4863 index = self._index 4864 4865 if not self._match(TokenType.INTERVAL) and match_interval: 4866 return None 4867 4868 if self._match(TokenType.STRING, advance=False): 4869 this = self._parse_primary() 4870 else: 4871 this = self._parse_term() 4872 4873 if not this or ( 4874 isinstance(this, exp.Column) 4875 and not this.table 4876 and not this.this.quoted 4877 and this.name.upper() == "IS" 4878 ): 4879 self._retreat(index) 4880 return None 4881 4882 unit = self._parse_function() or ( 4883 not self._match(TokenType.ALIAS, advance=False) 4884 and self._parse_var(any_token=True, upper=True) 4885 ) 4886 4887 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4888 # each INTERVAL expression into this canonical form so it's easy to transpile 4889 if this and this.is_number: 4890 this = exp.Literal.string(this.to_py()) 4891 elif this and this.is_string: 4892 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4893 if parts and unit: 4894 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4895 unit = None 4896 self._retreat(self._index - 1) 4897 4898 if len(parts) == 1: 4899 this = exp.Literal.string(parts[0][0]) 4900 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4901 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4902 unit = self.expression( 4903 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4904 ) 4905 4906 interval = self.expression(exp.Interval, this=this, unit=unit) 4907 4908 index = self._index 4909 self._match(TokenType.PLUS) 4910 4911 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4912 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4913 return self.expression( 4914 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4915 ) 4916 4917 self._retreat(index) 4918 return interval 4919 4920 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4921 this = self._parse_term() 4922 4923 while True: 4924 if self._match_set(self.BITWISE): 4925 this = self.expression( 4926 self.BITWISE[self._prev.token_type], 4927 this=this, 4928 expression=self._parse_term(), 4929 ) 4930 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4931 this = self.expression( 4932 exp.DPipe, 4933 this=this, 4934 expression=self._parse_term(), 4935 safe=not self.dialect.STRICT_STRING_CONCAT, 4936 ) 4937 elif self._match(TokenType.DQMARK): 4938 this = self.expression( 4939 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4940 ) 4941 elif self._match_pair(TokenType.LT, TokenType.LT): 4942 this = self.expression( 4943 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4944 ) 4945 elif self._match_pair(TokenType.GT, TokenType.GT): 4946 this = self.expression( 4947 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4948 ) 4949 else: 4950 break 4951 4952 return this 4953 4954 def _parse_term(self) -> t.Optional[exp.Expression]: 4955 this = self._parse_factor() 4956 4957 while self._match_set(self.TERM): 4958 klass = self.TERM[self._prev.token_type] 4959 comments = self._prev_comments 4960 expression = self._parse_factor() 4961 4962 this = self.expression(klass, this=this, comments=comments, expression=expression) 4963 4964 if isinstance(this, exp.Collate): 4965 expr = this.expression 4966 4967 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4968 # fallback to Identifier / Var 4969 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4970 ident = expr.this 4971 if isinstance(ident, exp.Identifier): 4972 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4973 4974 return this 4975 4976 def _parse_factor(self) -> t.Optional[exp.Expression]: 4977 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4978 this = parse_method() 4979 4980 while self._match_set(self.FACTOR): 4981 klass = self.FACTOR[self._prev.token_type] 4982 comments = self._prev_comments 4983 expression = parse_method() 4984 4985 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4986 self._retreat(self._index - 1) 4987 return this 4988 4989 this = self.expression(klass, this=this, comments=comments, expression=expression) 4990 4991 if isinstance(this, exp.Div): 4992 this.args["typed"] = self.dialect.TYPED_DIVISION 4993 this.args["safe"] = self.dialect.SAFE_DIVISION 4994 4995 return this 4996 4997 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4998 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4999 5000 def _parse_unary(self) -> t.Optional[exp.Expression]: 5001 if self._match_set(self.UNARY_PARSERS): 5002 return self.UNARY_PARSERS[self._prev.token_type](self) 5003 return self._parse_at_time_zone(self._parse_type()) 5004 5005 def _parse_type( 5006 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5007 ) -> t.Optional[exp.Expression]: 5008 interval = parse_interval and self._parse_interval() 5009 if interval: 5010 return interval 5011 5012 index = self._index 5013 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5014 5015 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5016 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5017 if isinstance(data_type, exp.Cast): 5018 # This constructor can contain ops directly after it, for instance struct unnesting: 5019 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5020 return self._parse_column_ops(data_type) 5021 5022 if data_type: 5023 index2 = self._index 5024 this = self._parse_primary() 5025 5026 if isinstance(this, exp.Literal): 5027 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5028 if parser: 5029 return parser(self, this, data_type) 5030 5031 return self.expression(exp.Cast, this=this, to=data_type) 5032 5033 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5034 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5035 # 5036 # If the index difference here is greater than 1, that means the parser itself must have 5037 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5038 # 5039 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5040 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5041 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5042 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5043 # 5044 # In these cases, we don't really want to return the converted type, but instead retreat 5045 # and try to parse a Column or Identifier in the section below. 5046 if data_type.expressions and index2 - index > 1: 5047 self._retreat(index2) 5048 return self._parse_column_ops(data_type) 5049 5050 self._retreat(index) 5051 5052 if fallback_to_identifier: 5053 return self._parse_id_var() 5054 5055 this = self._parse_column() 5056 return this and self._parse_column_ops(this) 5057 5058 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5059 this = self._parse_type() 5060 if not this: 5061 return None 5062 5063 if isinstance(this, exp.Column) and not this.table: 5064 this = exp.var(this.name.upper()) 5065 5066 return self.expression( 5067 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5068 ) 5069 5070 def _parse_types( 5071 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5072 ) -> t.Optional[exp.Expression]: 5073 index = self._index 5074 5075 this: t.Optional[exp.Expression] = None 5076 prefix = self._match_text_seq("SYSUDTLIB", ".") 5077 5078 if not self._match_set(self.TYPE_TOKENS): 5079 identifier = allow_identifiers and self._parse_id_var( 5080 any_token=False, tokens=(TokenType.VAR,) 5081 ) 5082 if isinstance(identifier, exp.Identifier): 5083 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5084 5085 if len(tokens) != 1: 5086 self.raise_error("Unexpected identifier", self._prev) 5087 5088 if tokens[0].token_type in self.TYPE_TOKENS: 5089 self._prev = tokens[0] 5090 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5091 type_name = identifier.name 5092 5093 while self._match(TokenType.DOT): 5094 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5095 5096 this = exp.DataType.build(type_name, udt=True) 5097 else: 5098 self._retreat(self._index - 1) 5099 return None 5100 else: 5101 return None 5102 5103 type_token = self._prev.token_type 5104 5105 if type_token == TokenType.PSEUDO_TYPE: 5106 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5107 5108 if type_token == TokenType.OBJECT_IDENTIFIER: 5109 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5110 5111 # https://materialize.com/docs/sql/types/map/ 5112 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5113 key_type = self._parse_types( 5114 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5115 ) 5116 if not self._match(TokenType.FARROW): 5117 self._retreat(index) 5118 return None 5119 5120 value_type = self._parse_types( 5121 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5122 ) 5123 if not self._match(TokenType.R_BRACKET): 5124 self._retreat(index) 5125 return None 5126 5127 return exp.DataType( 5128 this=exp.DataType.Type.MAP, 5129 expressions=[key_type, value_type], 5130 nested=True, 5131 prefix=prefix, 5132 ) 5133 5134 nested = type_token in self.NESTED_TYPE_TOKENS 5135 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5136 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5137 expressions = None 5138 maybe_func = False 5139 5140 if self._match(TokenType.L_PAREN): 5141 if is_struct: 5142 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5143 elif nested: 5144 expressions = self._parse_csv( 5145 lambda: self._parse_types( 5146 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5147 ) 5148 ) 5149 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5150 this = expressions[0] 5151 this.set("nullable", True) 5152 self._match_r_paren() 5153 return this 5154 elif type_token in self.ENUM_TYPE_TOKENS: 5155 expressions = self._parse_csv(self._parse_equality) 5156 elif is_aggregate: 5157 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5158 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5159 ) 5160 if not func_or_ident: 5161 return None 5162 expressions = [func_or_ident] 5163 if self._match(TokenType.COMMA): 5164 expressions.extend( 5165 self._parse_csv( 5166 lambda: self._parse_types( 5167 check_func=check_func, 5168 schema=schema, 5169 allow_identifiers=allow_identifiers, 5170 ) 5171 ) 5172 ) 5173 else: 5174 expressions = self._parse_csv(self._parse_type_size) 5175 5176 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5177 if type_token == TokenType.VECTOR and len(expressions) == 2: 5178 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5179 5180 if not expressions or not self._match(TokenType.R_PAREN): 5181 self._retreat(index) 5182 return None 5183 5184 maybe_func = True 5185 5186 values: t.Optional[t.List[exp.Expression]] = None 5187 5188 if nested and self._match(TokenType.LT): 5189 if is_struct: 5190 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5191 else: 5192 expressions = self._parse_csv( 5193 lambda: self._parse_types( 5194 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5195 ) 5196 ) 5197 5198 if not self._match(TokenType.GT): 5199 self.raise_error("Expecting >") 5200 5201 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5202 values = self._parse_csv(self._parse_assignment) 5203 if not values and is_struct: 5204 values = None 5205 self._retreat(self._index - 1) 5206 else: 5207 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5208 5209 if type_token in self.TIMESTAMPS: 5210 if self._match_text_seq("WITH", "TIME", "ZONE"): 5211 maybe_func = False 5212 tz_type = ( 5213 exp.DataType.Type.TIMETZ 5214 if type_token in self.TIMES 5215 else exp.DataType.Type.TIMESTAMPTZ 5216 ) 5217 this = exp.DataType(this=tz_type, expressions=expressions) 5218 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5219 maybe_func = False 5220 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5221 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5222 maybe_func = False 5223 elif type_token == TokenType.INTERVAL: 5224 unit = self._parse_var(upper=True) 5225 if unit: 5226 if self._match_text_seq("TO"): 5227 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5228 5229 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5230 else: 5231 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5232 5233 if maybe_func and check_func: 5234 index2 = self._index 5235 peek = self._parse_string() 5236 5237 if not peek: 5238 self._retreat(index) 5239 return None 5240 5241 self._retreat(index2) 5242 5243 if not this: 5244 if self._match_text_seq("UNSIGNED"): 5245 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5246 if not unsigned_type_token: 5247 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5248 5249 type_token = unsigned_type_token or type_token 5250 5251 this = exp.DataType( 5252 this=exp.DataType.Type[type_token.value], 5253 expressions=expressions, 5254 nested=nested, 5255 prefix=prefix, 5256 ) 5257 5258 # Empty arrays/structs are allowed 5259 if values is not None: 5260 cls = exp.Struct if is_struct else exp.Array 5261 this = exp.cast(cls(expressions=values), this, copy=False) 5262 5263 elif expressions: 5264 this.set("expressions", expressions) 5265 5266 # https://materialize.com/docs/sql/types/list/#type-name 5267 while self._match(TokenType.LIST): 5268 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5269 5270 index = self._index 5271 5272 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5273 matched_array = self._match(TokenType.ARRAY) 5274 5275 while self._curr: 5276 datatype_token = self._prev.token_type 5277 matched_l_bracket = self._match(TokenType.L_BRACKET) 5278 5279 if (not matched_l_bracket and not matched_array) or ( 5280 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5281 ): 5282 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5283 # not to be confused with the fixed size array parsing 5284 break 5285 5286 matched_array = False 5287 values = self._parse_csv(self._parse_assignment) or None 5288 if ( 5289 values 5290 and not schema 5291 and ( 5292 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5293 ) 5294 ): 5295 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5296 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5297 self._retreat(index) 5298 break 5299 5300 this = exp.DataType( 5301 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5302 ) 5303 self._match(TokenType.R_BRACKET) 5304 5305 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5306 converter = self.TYPE_CONVERTERS.get(this.this) 5307 if converter: 5308 this = converter(t.cast(exp.DataType, this)) 5309 5310 return this 5311 5312 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5313 index = self._index 5314 5315 if ( 5316 self._curr 5317 and self._next 5318 and self._curr.token_type in self.TYPE_TOKENS 5319 and self._next.token_type in self.TYPE_TOKENS 5320 ): 5321 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5322 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5323 this = self._parse_id_var() 5324 else: 5325 this = ( 5326 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5327 or self._parse_id_var() 5328 ) 5329 5330 self._match(TokenType.COLON) 5331 5332 if ( 5333 type_required 5334 and not isinstance(this, exp.DataType) 5335 and not self._match_set(self.TYPE_TOKENS, advance=False) 5336 ): 5337 self._retreat(index) 5338 return self._parse_types() 5339 5340 return self._parse_column_def(this) 5341 5342 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5343 if not self._match_text_seq("AT", "TIME", "ZONE"): 5344 return this 5345 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5346 5347 def _parse_column(self) -> t.Optional[exp.Expression]: 5348 this = self._parse_column_reference() 5349 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5350 5351 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5352 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5353 5354 return column 5355 5356 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5357 this = self._parse_field() 5358 if ( 5359 not this 5360 and self._match(TokenType.VALUES, advance=False) 5361 and self.VALUES_FOLLOWED_BY_PAREN 5362 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5363 ): 5364 this = self._parse_id_var() 5365 5366 if isinstance(this, exp.Identifier): 5367 # We bubble up comments from the Identifier to the Column 5368 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5369 5370 return this 5371 5372 def _parse_colon_as_variant_extract( 5373 self, this: t.Optional[exp.Expression] 5374 ) -> t.Optional[exp.Expression]: 5375 casts = [] 5376 json_path = [] 5377 escape = None 5378 5379 while self._match(TokenType.COLON): 5380 start_index = self._index 5381 5382 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5383 path = self._parse_column_ops( 5384 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5385 ) 5386 5387 # The cast :: operator has a lower precedence than the extraction operator :, so 5388 # we rearrange the AST appropriately to avoid casting the JSON path 5389 while isinstance(path, exp.Cast): 5390 casts.append(path.to) 5391 path = path.this 5392 5393 if casts: 5394 dcolon_offset = next( 5395 i 5396 for i, t in enumerate(self._tokens[start_index:]) 5397 if t.token_type == TokenType.DCOLON 5398 ) 5399 end_token = self._tokens[start_index + dcolon_offset - 1] 5400 else: 5401 end_token = self._prev 5402 5403 if path: 5404 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5405 # it'll roundtrip to a string literal in GET_PATH 5406 if isinstance(path, exp.Identifier) and path.quoted: 5407 escape = True 5408 5409 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5410 5411 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5412 # Databricks transforms it back to the colon/dot notation 5413 if json_path: 5414 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5415 5416 if json_path_expr: 5417 json_path_expr.set("escape", escape) 5418 5419 this = self.expression( 5420 exp.JSONExtract, 5421 this=this, 5422 expression=json_path_expr, 5423 variant_extract=True, 5424 ) 5425 5426 while casts: 5427 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5428 5429 return this 5430 5431 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5432 return self._parse_types() 5433 5434 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5435 this = self._parse_bracket(this) 5436 5437 while self._match_set(self.COLUMN_OPERATORS): 5438 op_token = self._prev.token_type 5439 op = self.COLUMN_OPERATORS.get(op_token) 5440 5441 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5442 field = self._parse_dcolon() 5443 if not field: 5444 self.raise_error("Expected type") 5445 elif op and self._curr: 5446 field = self._parse_column_reference() or self._parse_bracket() 5447 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5448 field = self._parse_column_ops(field) 5449 else: 5450 field = self._parse_field(any_token=True, anonymous_func=True) 5451 5452 if isinstance(field, (exp.Func, exp.Window)) and this: 5453 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5454 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5455 this = exp.replace_tree( 5456 this, 5457 lambda n: ( 5458 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5459 if n.table 5460 else n.this 5461 ) 5462 if isinstance(n, exp.Column) 5463 else n, 5464 ) 5465 5466 if op: 5467 this = op(self, this, field) 5468 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5469 this = self.expression( 5470 exp.Column, 5471 comments=this.comments, 5472 this=field, 5473 table=this.this, 5474 db=this.args.get("table"), 5475 catalog=this.args.get("db"), 5476 ) 5477 elif isinstance(field, exp.Window): 5478 # Move the exp.Dot's to the window's function 5479 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5480 field.set("this", window_func) 5481 this = field 5482 else: 5483 this = self.expression(exp.Dot, this=this, expression=field) 5484 5485 if field and field.comments: 5486 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5487 5488 this = self._parse_bracket(this) 5489 5490 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5491 5492 def _parse_primary(self) -> t.Optional[exp.Expression]: 5493 if self._match_set(self.PRIMARY_PARSERS): 5494 token_type = self._prev.token_type 5495 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5496 5497 if token_type == TokenType.STRING: 5498 expressions = [primary] 5499 while self._match(TokenType.STRING): 5500 expressions.append(exp.Literal.string(self._prev.text)) 5501 5502 if len(expressions) > 1: 5503 return self.expression(exp.Concat, expressions=expressions) 5504 5505 return primary 5506 5507 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5508 return exp.Literal.number(f"0.{self._prev.text}") 5509 5510 if self._match(TokenType.L_PAREN): 5511 comments = self._prev_comments 5512 query = self._parse_select() 5513 5514 if query: 5515 expressions = [query] 5516 else: 5517 expressions = self._parse_expressions() 5518 5519 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5520 5521 if not this and self._match(TokenType.R_PAREN, advance=False): 5522 this = self.expression(exp.Tuple) 5523 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5524 this = self._parse_subquery(this=this, parse_alias=False) 5525 elif isinstance(this, exp.Subquery): 5526 this = self._parse_subquery( 5527 this=self._parse_set_operations(this), parse_alias=False 5528 ) 5529 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5530 this = self.expression(exp.Tuple, expressions=expressions) 5531 else: 5532 this = self.expression(exp.Paren, this=this) 5533 5534 if this: 5535 this.add_comments(comments) 5536 5537 self._match_r_paren(expression=this) 5538 return this 5539 5540 return None 5541 5542 def _parse_field( 5543 self, 5544 any_token: bool = False, 5545 tokens: t.Optional[t.Collection[TokenType]] = None, 5546 anonymous_func: bool = False, 5547 ) -> t.Optional[exp.Expression]: 5548 if anonymous_func: 5549 field = ( 5550 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5551 or self._parse_primary() 5552 ) 5553 else: 5554 field = self._parse_primary() or self._parse_function( 5555 anonymous=anonymous_func, any_token=any_token 5556 ) 5557 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5558 5559 def _parse_function( 5560 self, 5561 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5562 anonymous: bool = False, 5563 optional_parens: bool = True, 5564 any_token: bool = False, 5565 ) -> t.Optional[exp.Expression]: 5566 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5567 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5568 fn_syntax = False 5569 if ( 5570 self._match(TokenType.L_BRACE, advance=False) 5571 and self._next 5572 and self._next.text.upper() == "FN" 5573 ): 5574 self._advance(2) 5575 fn_syntax = True 5576 5577 func = self._parse_function_call( 5578 functions=functions, 5579 anonymous=anonymous, 5580 optional_parens=optional_parens, 5581 any_token=any_token, 5582 ) 5583 5584 if fn_syntax: 5585 self._match(TokenType.R_BRACE) 5586 5587 return func 5588 5589 def _parse_function_call( 5590 self, 5591 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5592 anonymous: bool = False, 5593 optional_parens: bool = True, 5594 any_token: bool = False, 5595 ) -> t.Optional[exp.Expression]: 5596 if not self._curr: 5597 return None 5598 5599 comments = self._curr.comments 5600 token_type = self._curr.token_type 5601 this = self._curr.text 5602 upper = this.upper() 5603 5604 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5605 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5606 self._advance() 5607 return self._parse_window(parser(self)) 5608 5609 if not self._next or self._next.token_type != TokenType.L_PAREN: 5610 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5611 self._advance() 5612 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5613 5614 return None 5615 5616 if any_token: 5617 if token_type in self.RESERVED_TOKENS: 5618 return None 5619 elif token_type not in self.FUNC_TOKENS: 5620 return None 5621 5622 self._advance(2) 5623 5624 parser = self.FUNCTION_PARSERS.get(upper) 5625 if parser and not anonymous: 5626 this = parser(self) 5627 else: 5628 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5629 5630 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5631 this = self.expression( 5632 subquery_predicate, comments=comments, this=self._parse_select() 5633 ) 5634 self._match_r_paren() 5635 return this 5636 5637 if functions is None: 5638 functions = self.FUNCTIONS 5639 5640 function = functions.get(upper) 5641 known_function = function and not anonymous 5642 5643 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5644 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5645 5646 post_func_comments = self._curr and self._curr.comments 5647 if known_function and post_func_comments: 5648 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5649 # call we'll construct it as exp.Anonymous, even if it's "known" 5650 if any( 5651 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5652 for comment in post_func_comments 5653 ): 5654 known_function = False 5655 5656 if alias and known_function: 5657 args = self._kv_to_prop_eq(args) 5658 5659 if known_function: 5660 func_builder = t.cast(t.Callable, function) 5661 5662 if "dialect" in func_builder.__code__.co_varnames: 5663 func = func_builder(args, dialect=self.dialect) 5664 else: 5665 func = func_builder(args) 5666 5667 func = self.validate_expression(func, args) 5668 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5669 func.meta["name"] = this 5670 5671 this = func 5672 else: 5673 if token_type == TokenType.IDENTIFIER: 5674 this = exp.Identifier(this=this, quoted=True) 5675 this = self.expression(exp.Anonymous, this=this, expressions=args) 5676 5677 if isinstance(this, exp.Expression): 5678 this.add_comments(comments) 5679 5680 self._match_r_paren(this) 5681 return self._parse_window(this) 5682 5683 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5684 return expression 5685 5686 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5687 transformed = [] 5688 5689 for index, e in enumerate(expressions): 5690 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5691 if isinstance(e, exp.Alias): 5692 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5693 5694 if not isinstance(e, exp.PropertyEQ): 5695 e = self.expression( 5696 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5697 ) 5698 5699 if isinstance(e.this, exp.Column): 5700 e.this.replace(e.this.this) 5701 else: 5702 e = self._to_prop_eq(e, index) 5703 5704 transformed.append(e) 5705 5706 return transformed 5707 5708 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5709 return self._parse_statement() 5710 5711 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5712 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5713 5714 def _parse_user_defined_function( 5715 self, kind: t.Optional[TokenType] = None 5716 ) -> t.Optional[exp.Expression]: 5717 this = self._parse_table_parts(schema=True) 5718 5719 if not self._match(TokenType.L_PAREN): 5720 return this 5721 5722 expressions = self._parse_csv(self._parse_function_parameter) 5723 self._match_r_paren() 5724 return self.expression( 5725 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5726 ) 5727 5728 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5729 literal = self._parse_primary() 5730 if literal: 5731 return self.expression(exp.Introducer, this=token.text, expression=literal) 5732 5733 return self.expression(exp.Identifier, this=token.text) 5734 5735 def _parse_session_parameter(self) -> exp.SessionParameter: 5736 kind = None 5737 this = self._parse_id_var() or self._parse_primary() 5738 5739 if this and self._match(TokenType.DOT): 5740 kind = this.name 5741 this = self._parse_var() or self._parse_primary() 5742 5743 return self.expression(exp.SessionParameter, this=this, kind=kind) 5744 5745 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5746 return self._parse_id_var() 5747 5748 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5749 index = self._index 5750 5751 if self._match(TokenType.L_PAREN): 5752 expressions = t.cast( 5753 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5754 ) 5755 5756 if not self._match(TokenType.R_PAREN): 5757 self._retreat(index) 5758 else: 5759 expressions = [self._parse_lambda_arg()] 5760 5761 if self._match_set(self.LAMBDAS): 5762 return self.LAMBDAS[self._prev.token_type](self, expressions) 5763 5764 self._retreat(index) 5765 5766 this: t.Optional[exp.Expression] 5767 5768 if self._match(TokenType.DISTINCT): 5769 this = self.expression( 5770 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5771 ) 5772 else: 5773 this = self._parse_select_or_expression(alias=alias) 5774 5775 return self._parse_limit( 5776 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5777 ) 5778 5779 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5780 index = self._index 5781 if not self._match(TokenType.L_PAREN): 5782 return this 5783 5784 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5785 # expr can be of both types 5786 if self._match_set(self.SELECT_START_TOKENS): 5787 self._retreat(index) 5788 return this 5789 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5790 self._match_r_paren() 5791 return self.expression(exp.Schema, this=this, expressions=args) 5792 5793 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5794 return self._parse_column_def(self._parse_field(any_token=True)) 5795 5796 def _parse_column_def( 5797 self, this: t.Optional[exp.Expression], computed_column: bool = True 5798 ) -> t.Optional[exp.Expression]: 5799 # column defs are not really columns, they're identifiers 5800 if isinstance(this, exp.Column): 5801 this = this.this 5802 5803 if not computed_column: 5804 self._match(TokenType.ALIAS) 5805 5806 kind = self._parse_types(schema=True) 5807 5808 if self._match_text_seq("FOR", "ORDINALITY"): 5809 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5810 5811 constraints: t.List[exp.Expression] = [] 5812 5813 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5814 ("ALIAS", "MATERIALIZED") 5815 ): 5816 persisted = self._prev.text.upper() == "MATERIALIZED" 5817 constraint_kind = exp.ComputedColumnConstraint( 5818 this=self._parse_assignment(), 5819 persisted=persisted or self._match_text_seq("PERSISTED"), 5820 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5821 ) 5822 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5823 elif ( 5824 kind 5825 and self._match(TokenType.ALIAS, advance=False) 5826 and ( 5827 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5828 or (self._next and self._next.token_type == TokenType.L_PAREN) 5829 ) 5830 ): 5831 self._advance() 5832 constraints.append( 5833 self.expression( 5834 exp.ColumnConstraint, 5835 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5836 ) 5837 ) 5838 5839 while True: 5840 constraint = self._parse_column_constraint() 5841 if not constraint: 5842 break 5843 constraints.append(constraint) 5844 5845 if not kind and not constraints: 5846 return this 5847 5848 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5849 5850 def _parse_auto_increment( 5851 self, 5852 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5853 start = None 5854 increment = None 5855 5856 if self._match(TokenType.L_PAREN, advance=False): 5857 args = self._parse_wrapped_csv(self._parse_bitwise) 5858 start = seq_get(args, 0) 5859 increment = seq_get(args, 1) 5860 elif self._match_text_seq("START"): 5861 start = self._parse_bitwise() 5862 self._match_text_seq("INCREMENT") 5863 increment = self._parse_bitwise() 5864 5865 if start and increment: 5866 return exp.GeneratedAsIdentityColumnConstraint( 5867 start=start, increment=increment, this=False 5868 ) 5869 5870 return exp.AutoIncrementColumnConstraint() 5871 5872 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5873 if not self._match_text_seq("REFRESH"): 5874 self._retreat(self._index - 1) 5875 return None 5876 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5877 5878 def _parse_compress(self) -> exp.CompressColumnConstraint: 5879 if self._match(TokenType.L_PAREN, advance=False): 5880 return self.expression( 5881 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5882 ) 5883 5884 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5885 5886 def _parse_generated_as_identity( 5887 self, 5888 ) -> ( 5889 exp.GeneratedAsIdentityColumnConstraint 5890 | exp.ComputedColumnConstraint 5891 | exp.GeneratedAsRowColumnConstraint 5892 ): 5893 if self._match_text_seq("BY", "DEFAULT"): 5894 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5895 this = self.expression( 5896 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5897 ) 5898 else: 5899 self._match_text_seq("ALWAYS") 5900 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5901 5902 self._match(TokenType.ALIAS) 5903 5904 if self._match_text_seq("ROW"): 5905 start = self._match_text_seq("START") 5906 if not start: 5907 self._match(TokenType.END) 5908 hidden = self._match_text_seq("HIDDEN") 5909 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5910 5911 identity = self._match_text_seq("IDENTITY") 5912 5913 if self._match(TokenType.L_PAREN): 5914 if self._match(TokenType.START_WITH): 5915 this.set("start", self._parse_bitwise()) 5916 if self._match_text_seq("INCREMENT", "BY"): 5917 this.set("increment", self._parse_bitwise()) 5918 if self._match_text_seq("MINVALUE"): 5919 this.set("minvalue", self._parse_bitwise()) 5920 if self._match_text_seq("MAXVALUE"): 5921 this.set("maxvalue", self._parse_bitwise()) 5922 5923 if self._match_text_seq("CYCLE"): 5924 this.set("cycle", True) 5925 elif self._match_text_seq("NO", "CYCLE"): 5926 this.set("cycle", False) 5927 5928 if not identity: 5929 this.set("expression", self._parse_range()) 5930 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5931 args = self._parse_csv(self._parse_bitwise) 5932 this.set("start", seq_get(args, 0)) 5933 this.set("increment", seq_get(args, 1)) 5934 5935 self._match_r_paren() 5936 5937 return this 5938 5939 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5940 self._match_text_seq("LENGTH") 5941 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5942 5943 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5944 if self._match_text_seq("NULL"): 5945 return self.expression(exp.NotNullColumnConstraint) 5946 if self._match_text_seq("CASESPECIFIC"): 5947 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5948 if self._match_text_seq("FOR", "REPLICATION"): 5949 return self.expression(exp.NotForReplicationColumnConstraint) 5950 5951 # Unconsume the `NOT` token 5952 self._retreat(self._index - 1) 5953 return None 5954 5955 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5956 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5957 5958 procedure_option_follows = ( 5959 self._match(TokenType.WITH, advance=False) 5960 and self._next 5961 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5962 ) 5963 5964 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5965 return self.expression( 5966 exp.ColumnConstraint, 5967 this=this, 5968 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5969 ) 5970 5971 return this 5972 5973 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5974 if not self._match(TokenType.CONSTRAINT): 5975 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5976 5977 return self.expression( 5978 exp.Constraint, 5979 this=self._parse_id_var(), 5980 expressions=self._parse_unnamed_constraints(), 5981 ) 5982 5983 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5984 constraints = [] 5985 while True: 5986 constraint = self._parse_unnamed_constraint() or self._parse_function() 5987 if not constraint: 5988 break 5989 constraints.append(constraint) 5990 5991 return constraints 5992 5993 def _parse_unnamed_constraint( 5994 self, constraints: t.Optional[t.Collection[str]] = None 5995 ) -> t.Optional[exp.Expression]: 5996 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5997 constraints or self.CONSTRAINT_PARSERS 5998 ): 5999 return None 6000 6001 constraint = self._prev.text.upper() 6002 if constraint not in self.CONSTRAINT_PARSERS: 6003 self.raise_error(f"No parser found for schema constraint {constraint}.") 6004 6005 return self.CONSTRAINT_PARSERS[constraint](self) 6006 6007 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6008 return self._parse_id_var(any_token=False) 6009 6010 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6011 self._match_text_seq("KEY") 6012 return self.expression( 6013 exp.UniqueColumnConstraint, 6014 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6015 this=self._parse_schema(self._parse_unique_key()), 6016 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6017 on_conflict=self._parse_on_conflict(), 6018 options=self._parse_key_constraint_options(), 6019 ) 6020 6021 def _parse_key_constraint_options(self) -> t.List[str]: 6022 options = [] 6023 while True: 6024 if not self._curr: 6025 break 6026 6027 if self._match(TokenType.ON): 6028 action = None 6029 on = self._advance_any() and self._prev.text 6030 6031 if self._match_text_seq("NO", "ACTION"): 6032 action = "NO ACTION" 6033 elif self._match_text_seq("CASCADE"): 6034 action = "CASCADE" 6035 elif self._match_text_seq("RESTRICT"): 6036 action = "RESTRICT" 6037 elif self._match_pair(TokenType.SET, TokenType.NULL): 6038 action = "SET NULL" 6039 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6040 action = "SET DEFAULT" 6041 else: 6042 self.raise_error("Invalid key constraint") 6043 6044 options.append(f"ON {on} {action}") 6045 else: 6046 var = self._parse_var_from_options( 6047 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6048 ) 6049 if not var: 6050 break 6051 options.append(var.name) 6052 6053 return options 6054 6055 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6056 if match and not self._match(TokenType.REFERENCES): 6057 return None 6058 6059 expressions = None 6060 this = self._parse_table(schema=True) 6061 options = self._parse_key_constraint_options() 6062 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6063 6064 def _parse_foreign_key(self) -> exp.ForeignKey: 6065 expressions = self._parse_wrapped_id_vars() 6066 reference = self._parse_references() 6067 on_options = {} 6068 6069 while self._match(TokenType.ON): 6070 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6071 self.raise_error("Expected DELETE or UPDATE") 6072 6073 kind = self._prev.text.lower() 6074 6075 if self._match_text_seq("NO", "ACTION"): 6076 action = "NO ACTION" 6077 elif self._match(TokenType.SET): 6078 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6079 action = "SET " + self._prev.text.upper() 6080 else: 6081 self._advance() 6082 action = self._prev.text.upper() 6083 6084 on_options[kind] = action 6085 6086 return self.expression( 6087 exp.ForeignKey, 6088 expressions=expressions, 6089 reference=reference, 6090 options=self._parse_key_constraint_options(), 6091 **on_options, # type: ignore 6092 ) 6093 6094 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6095 return self._parse_ordered() or self._parse_field() 6096 6097 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6098 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6099 self._retreat(self._index - 1) 6100 return None 6101 6102 id_vars = self._parse_wrapped_id_vars() 6103 return self.expression( 6104 exp.PeriodForSystemTimeConstraint, 6105 this=seq_get(id_vars, 0), 6106 expression=seq_get(id_vars, 1), 6107 ) 6108 6109 def _parse_primary_key( 6110 self, wrapped_optional: bool = False, in_props: bool = False 6111 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6112 desc = ( 6113 self._match_set((TokenType.ASC, TokenType.DESC)) 6114 and self._prev.token_type == TokenType.DESC 6115 ) 6116 6117 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6118 return self.expression( 6119 exp.PrimaryKeyColumnConstraint, 6120 desc=desc, 6121 options=self._parse_key_constraint_options(), 6122 ) 6123 6124 expressions = self._parse_wrapped_csv( 6125 self._parse_primary_key_part, optional=wrapped_optional 6126 ) 6127 options = self._parse_key_constraint_options() 6128 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6129 6130 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6131 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6132 6133 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6134 """ 6135 Parses a datetime column in ODBC format. We parse the column into the corresponding 6136 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6137 same as we did for `DATE('yyyy-mm-dd')`. 6138 6139 Reference: 6140 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6141 """ 6142 self._match(TokenType.VAR) 6143 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6144 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6145 if not self._match(TokenType.R_BRACE): 6146 self.raise_error("Expected }") 6147 return expression 6148 6149 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6150 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6151 return this 6152 6153 bracket_kind = self._prev.token_type 6154 if ( 6155 bracket_kind == TokenType.L_BRACE 6156 and self._curr 6157 and self._curr.token_type == TokenType.VAR 6158 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6159 ): 6160 return self._parse_odbc_datetime_literal() 6161 6162 expressions = self._parse_csv( 6163 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6164 ) 6165 6166 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6167 self.raise_error("Expected ]") 6168 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6169 self.raise_error("Expected }") 6170 6171 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6172 if bracket_kind == TokenType.L_BRACE: 6173 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6174 elif not this: 6175 this = build_array_constructor( 6176 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6177 ) 6178 else: 6179 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6180 if constructor_type: 6181 return build_array_constructor( 6182 constructor_type, 6183 args=expressions, 6184 bracket_kind=bracket_kind, 6185 dialect=self.dialect, 6186 ) 6187 6188 expressions = apply_index_offset( 6189 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6190 ) 6191 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6192 6193 self._add_comments(this) 6194 return self._parse_bracket(this) 6195 6196 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6197 if self._match(TokenType.COLON): 6198 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6199 return this 6200 6201 def _parse_case(self) -> t.Optional[exp.Expression]: 6202 ifs = [] 6203 default = None 6204 6205 comments = self._prev_comments 6206 expression = self._parse_assignment() 6207 6208 while self._match(TokenType.WHEN): 6209 this = self._parse_assignment() 6210 self._match(TokenType.THEN) 6211 then = self._parse_assignment() 6212 ifs.append(self.expression(exp.If, this=this, true=then)) 6213 6214 if self._match(TokenType.ELSE): 6215 default = self._parse_assignment() 6216 6217 if not self._match(TokenType.END): 6218 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6219 default = exp.column("interval") 6220 else: 6221 self.raise_error("Expected END after CASE", self._prev) 6222 6223 return self.expression( 6224 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6225 ) 6226 6227 def _parse_if(self) -> t.Optional[exp.Expression]: 6228 if self._match(TokenType.L_PAREN): 6229 args = self._parse_csv( 6230 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6231 ) 6232 this = self.validate_expression(exp.If.from_arg_list(args), args) 6233 self._match_r_paren() 6234 else: 6235 index = self._index - 1 6236 6237 if self.NO_PAREN_IF_COMMANDS and index == 0: 6238 return self._parse_as_command(self._prev) 6239 6240 condition = self._parse_assignment() 6241 6242 if not condition: 6243 self._retreat(index) 6244 return None 6245 6246 self._match(TokenType.THEN) 6247 true = self._parse_assignment() 6248 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6249 self._match(TokenType.END) 6250 this = self.expression(exp.If, this=condition, true=true, false=false) 6251 6252 return this 6253 6254 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6255 if not self._match_text_seq("VALUE", "FOR"): 6256 self._retreat(self._index - 1) 6257 return None 6258 6259 return self.expression( 6260 exp.NextValueFor, 6261 this=self._parse_column(), 6262 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6263 ) 6264 6265 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6266 this = self._parse_function() or self._parse_var_or_string(upper=True) 6267 6268 if self._match(TokenType.FROM): 6269 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6270 6271 if not self._match(TokenType.COMMA): 6272 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6273 6274 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6275 6276 def _parse_gap_fill(self) -> exp.GapFill: 6277 self._match(TokenType.TABLE) 6278 this = self._parse_table() 6279 6280 self._match(TokenType.COMMA) 6281 args = [this, *self._parse_csv(self._parse_lambda)] 6282 6283 gap_fill = exp.GapFill.from_arg_list(args) 6284 return self.validate_expression(gap_fill, args) 6285 6286 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6287 this = self._parse_assignment() 6288 6289 if not self._match(TokenType.ALIAS): 6290 if self._match(TokenType.COMMA): 6291 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6292 6293 self.raise_error("Expected AS after CAST") 6294 6295 fmt = None 6296 to = self._parse_types() 6297 6298 default = self._match(TokenType.DEFAULT) 6299 if default: 6300 default = self._parse_bitwise() 6301 self._match_text_seq("ON", "CONVERSION", "ERROR") 6302 6303 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6304 fmt_string = self._parse_string() 6305 fmt = self._parse_at_time_zone(fmt_string) 6306 6307 if not to: 6308 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6309 if to.this in exp.DataType.TEMPORAL_TYPES: 6310 this = self.expression( 6311 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6312 this=this, 6313 format=exp.Literal.string( 6314 format_time( 6315 fmt_string.this if fmt_string else "", 6316 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6317 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6318 ) 6319 ), 6320 safe=safe, 6321 ) 6322 6323 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6324 this.set("zone", fmt.args["zone"]) 6325 return this 6326 elif not to: 6327 self.raise_error("Expected TYPE after CAST") 6328 elif isinstance(to, exp.Identifier): 6329 to = exp.DataType.build(to.name, udt=True) 6330 elif to.this == exp.DataType.Type.CHAR: 6331 if self._match(TokenType.CHARACTER_SET): 6332 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6333 6334 return self.expression( 6335 exp.Cast if strict else exp.TryCast, 6336 this=this, 6337 to=to, 6338 format=fmt, 6339 safe=safe, 6340 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6341 default=default, 6342 ) 6343 6344 def _parse_string_agg(self) -> exp.GroupConcat: 6345 if self._match(TokenType.DISTINCT): 6346 args: t.List[t.Optional[exp.Expression]] = [ 6347 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6348 ] 6349 if self._match(TokenType.COMMA): 6350 args.extend(self._parse_csv(self._parse_assignment)) 6351 else: 6352 args = self._parse_csv(self._parse_assignment) # type: ignore 6353 6354 if self._match_text_seq("ON", "OVERFLOW"): 6355 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6356 if self._match_text_seq("ERROR"): 6357 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6358 else: 6359 self._match_text_seq("TRUNCATE") 6360 on_overflow = self.expression( 6361 exp.OverflowTruncateBehavior, 6362 this=self._parse_string(), 6363 with_count=( 6364 self._match_text_seq("WITH", "COUNT") 6365 or not self._match_text_seq("WITHOUT", "COUNT") 6366 ), 6367 ) 6368 else: 6369 on_overflow = None 6370 6371 index = self._index 6372 if not self._match(TokenType.R_PAREN) and args: 6373 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6374 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6375 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6376 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6377 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6378 6379 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6380 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6381 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6382 if not self._match_text_seq("WITHIN", "GROUP"): 6383 self._retreat(index) 6384 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6385 6386 # The corresponding match_r_paren will be called in parse_function (caller) 6387 self._match_l_paren() 6388 6389 return self.expression( 6390 exp.GroupConcat, 6391 this=self._parse_order(this=seq_get(args, 0)), 6392 separator=seq_get(args, 1), 6393 on_overflow=on_overflow, 6394 ) 6395 6396 def _parse_convert( 6397 self, strict: bool, safe: t.Optional[bool] = None 6398 ) -> t.Optional[exp.Expression]: 6399 this = self._parse_bitwise() 6400 6401 if self._match(TokenType.USING): 6402 to: t.Optional[exp.Expression] = self.expression( 6403 exp.CharacterSet, this=self._parse_var() 6404 ) 6405 elif self._match(TokenType.COMMA): 6406 to = self._parse_types() 6407 else: 6408 to = None 6409 6410 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6411 6412 def _parse_xml_table(self) -> exp.XMLTable: 6413 namespaces = None 6414 passing = None 6415 columns = None 6416 6417 if self._match_text_seq("XMLNAMESPACES", "("): 6418 namespaces = self._parse_xml_namespace() 6419 self._match_text_seq(")", ",") 6420 6421 this = self._parse_string() 6422 6423 if self._match_text_seq("PASSING"): 6424 # The BY VALUE keywords are optional and are provided for semantic clarity 6425 self._match_text_seq("BY", "VALUE") 6426 passing = self._parse_csv(self._parse_column) 6427 6428 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6429 6430 if self._match_text_seq("COLUMNS"): 6431 columns = self._parse_csv(self._parse_field_def) 6432 6433 return self.expression( 6434 exp.XMLTable, 6435 this=this, 6436 namespaces=namespaces, 6437 passing=passing, 6438 columns=columns, 6439 by_ref=by_ref, 6440 ) 6441 6442 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6443 namespaces = [] 6444 6445 while True: 6446 if self._match(TokenType.DEFAULT): 6447 uri = self._parse_string() 6448 else: 6449 uri = self._parse_alias(self._parse_string()) 6450 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6451 if not self._match(TokenType.COMMA): 6452 break 6453 6454 return namespaces 6455 6456 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6457 """ 6458 There are generally two variants of the DECODE function: 6459 6460 - DECODE(bin, charset) 6461 - DECODE(expression, search, result [, search, result] ... [, default]) 6462 6463 The second variant will always be parsed into a CASE expression. Note that NULL 6464 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6465 instead of relying on pattern matching. 6466 """ 6467 args = self._parse_csv(self._parse_assignment) 6468 6469 if len(args) < 3: 6470 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6471 6472 expression, *expressions = args 6473 if not expression: 6474 return None 6475 6476 ifs = [] 6477 for search, result in zip(expressions[::2], expressions[1::2]): 6478 if not search or not result: 6479 return None 6480 6481 if isinstance(search, exp.Literal): 6482 ifs.append( 6483 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6484 ) 6485 elif isinstance(search, exp.Null): 6486 ifs.append( 6487 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6488 ) 6489 else: 6490 cond = exp.or_( 6491 exp.EQ(this=expression.copy(), expression=search), 6492 exp.and_( 6493 exp.Is(this=expression.copy(), expression=exp.Null()), 6494 exp.Is(this=search.copy(), expression=exp.Null()), 6495 copy=False, 6496 ), 6497 copy=False, 6498 ) 6499 ifs.append(exp.If(this=cond, true=result)) 6500 6501 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6502 6503 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6504 self._match_text_seq("KEY") 6505 key = self._parse_column() 6506 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6507 self._match_text_seq("VALUE") 6508 value = self._parse_bitwise() 6509 6510 if not key and not value: 6511 return None 6512 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6513 6514 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6515 if not this or not self._match_text_seq("FORMAT", "JSON"): 6516 return this 6517 6518 return self.expression(exp.FormatJson, this=this) 6519 6520 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6521 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6522 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6523 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6524 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6525 else: 6526 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6527 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6528 6529 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6530 6531 if not empty and not error and not null: 6532 return None 6533 6534 return self.expression( 6535 exp.OnCondition, 6536 empty=empty, 6537 error=error, 6538 null=null, 6539 ) 6540 6541 def _parse_on_handling( 6542 self, on: str, *values: str 6543 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6544 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6545 for value in values: 6546 if self._match_text_seq(value, "ON", on): 6547 return f"{value} ON {on}" 6548 6549 index = self._index 6550 if self._match(TokenType.DEFAULT): 6551 default_value = self._parse_bitwise() 6552 if self._match_text_seq("ON", on): 6553 return default_value 6554 6555 self._retreat(index) 6556 6557 return None 6558 6559 @t.overload 6560 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6561 6562 @t.overload 6563 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6564 6565 def _parse_json_object(self, agg=False): 6566 star = self._parse_star() 6567 expressions = ( 6568 [star] 6569 if star 6570 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6571 ) 6572 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6573 6574 unique_keys = None 6575 if self._match_text_seq("WITH", "UNIQUE"): 6576 unique_keys = True 6577 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6578 unique_keys = False 6579 6580 self._match_text_seq("KEYS") 6581 6582 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6583 self._parse_type() 6584 ) 6585 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6586 6587 return self.expression( 6588 exp.JSONObjectAgg if agg else exp.JSONObject, 6589 expressions=expressions, 6590 null_handling=null_handling, 6591 unique_keys=unique_keys, 6592 return_type=return_type, 6593 encoding=encoding, 6594 ) 6595 6596 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6597 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6598 if not self._match_text_seq("NESTED"): 6599 this = self._parse_id_var() 6600 kind = self._parse_types(allow_identifiers=False) 6601 nested = None 6602 else: 6603 this = None 6604 kind = None 6605 nested = True 6606 6607 path = self._match_text_seq("PATH") and self._parse_string() 6608 nested_schema = nested and self._parse_json_schema() 6609 6610 return self.expression( 6611 exp.JSONColumnDef, 6612 this=this, 6613 kind=kind, 6614 path=path, 6615 nested_schema=nested_schema, 6616 ) 6617 6618 def _parse_json_schema(self) -> exp.JSONSchema: 6619 self._match_text_seq("COLUMNS") 6620 return self.expression( 6621 exp.JSONSchema, 6622 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6623 ) 6624 6625 def _parse_json_table(self) -> exp.JSONTable: 6626 this = self._parse_format_json(self._parse_bitwise()) 6627 path = self._match(TokenType.COMMA) and self._parse_string() 6628 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6629 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6630 schema = self._parse_json_schema() 6631 6632 return exp.JSONTable( 6633 this=this, 6634 schema=schema, 6635 path=path, 6636 error_handling=error_handling, 6637 empty_handling=empty_handling, 6638 ) 6639 6640 def _parse_match_against(self) -> exp.MatchAgainst: 6641 expressions = self._parse_csv(self._parse_column) 6642 6643 self._match_text_seq(")", "AGAINST", "(") 6644 6645 this = self._parse_string() 6646 6647 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6648 modifier = "IN NATURAL LANGUAGE MODE" 6649 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6650 modifier = f"{modifier} WITH QUERY EXPANSION" 6651 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6652 modifier = "IN BOOLEAN MODE" 6653 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6654 modifier = "WITH QUERY EXPANSION" 6655 else: 6656 modifier = None 6657 6658 return self.expression( 6659 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6660 ) 6661 6662 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6663 def _parse_open_json(self) -> exp.OpenJSON: 6664 this = self._parse_bitwise() 6665 path = self._match(TokenType.COMMA) and self._parse_string() 6666 6667 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6668 this = self._parse_field(any_token=True) 6669 kind = self._parse_types() 6670 path = self._parse_string() 6671 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6672 6673 return self.expression( 6674 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6675 ) 6676 6677 expressions = None 6678 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6679 self._match_l_paren() 6680 expressions = self._parse_csv(_parse_open_json_column_def) 6681 6682 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6683 6684 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6685 args = self._parse_csv(self._parse_bitwise) 6686 6687 if self._match(TokenType.IN): 6688 return self.expression( 6689 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6690 ) 6691 6692 if haystack_first: 6693 haystack = seq_get(args, 0) 6694 needle = seq_get(args, 1) 6695 else: 6696 haystack = seq_get(args, 1) 6697 needle = seq_get(args, 0) 6698 6699 return self.expression( 6700 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6701 ) 6702 6703 def _parse_predict(self) -> exp.Predict: 6704 self._match_text_seq("MODEL") 6705 this = self._parse_table() 6706 6707 self._match(TokenType.COMMA) 6708 self._match_text_seq("TABLE") 6709 6710 return self.expression( 6711 exp.Predict, 6712 this=this, 6713 expression=self._parse_table(), 6714 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6715 ) 6716 6717 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6718 args = self._parse_csv(self._parse_table) 6719 return exp.JoinHint(this=func_name.upper(), expressions=args) 6720 6721 def _parse_substring(self) -> exp.Substring: 6722 # Postgres supports the form: substring(string [from int] [for int]) 6723 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6724 6725 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6726 6727 if self._match(TokenType.FROM): 6728 args.append(self._parse_bitwise()) 6729 if self._match(TokenType.FOR): 6730 if len(args) == 1: 6731 args.append(exp.Literal.number(1)) 6732 args.append(self._parse_bitwise()) 6733 6734 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6735 6736 def _parse_trim(self) -> exp.Trim: 6737 # https://www.w3resource.com/sql/character-functions/trim.php 6738 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6739 6740 position = None 6741 collation = None 6742 expression = None 6743 6744 if self._match_texts(self.TRIM_TYPES): 6745 position = self._prev.text.upper() 6746 6747 this = self._parse_bitwise() 6748 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6749 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6750 expression = self._parse_bitwise() 6751 6752 if invert_order: 6753 this, expression = expression, this 6754 6755 if self._match(TokenType.COLLATE): 6756 collation = self._parse_bitwise() 6757 6758 return self.expression( 6759 exp.Trim, this=this, position=position, expression=expression, collation=collation 6760 ) 6761 6762 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6763 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6764 6765 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6766 return self._parse_window(self._parse_id_var(), alias=True) 6767 6768 def _parse_respect_or_ignore_nulls( 6769 self, this: t.Optional[exp.Expression] 6770 ) -> t.Optional[exp.Expression]: 6771 if self._match_text_seq("IGNORE", "NULLS"): 6772 return self.expression(exp.IgnoreNulls, this=this) 6773 if self._match_text_seq("RESPECT", "NULLS"): 6774 return self.expression(exp.RespectNulls, this=this) 6775 return this 6776 6777 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6778 if self._match(TokenType.HAVING): 6779 self._match_texts(("MAX", "MIN")) 6780 max = self._prev.text.upper() != "MIN" 6781 return self.expression( 6782 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6783 ) 6784 6785 return this 6786 6787 def _parse_window( 6788 self, this: t.Optional[exp.Expression], alias: bool = False 6789 ) -> t.Optional[exp.Expression]: 6790 func = this 6791 comments = func.comments if isinstance(func, exp.Expression) else None 6792 6793 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6794 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6795 if self._match_text_seq("WITHIN", "GROUP"): 6796 order = self._parse_wrapped(self._parse_order) 6797 this = self.expression(exp.WithinGroup, this=this, expression=order) 6798 6799 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6800 self._match(TokenType.WHERE) 6801 this = self.expression( 6802 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6803 ) 6804 self._match_r_paren() 6805 6806 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6807 # Some dialects choose to implement and some do not. 6808 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6809 6810 # There is some code above in _parse_lambda that handles 6811 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6812 6813 # The below changes handle 6814 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6815 6816 # Oracle allows both formats 6817 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6818 # and Snowflake chose to do the same for familiarity 6819 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6820 if isinstance(this, exp.AggFunc): 6821 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6822 6823 if ignore_respect and ignore_respect is not this: 6824 ignore_respect.replace(ignore_respect.this) 6825 this = self.expression(ignore_respect.__class__, this=this) 6826 6827 this = self._parse_respect_or_ignore_nulls(this) 6828 6829 # bigquery select from window x AS (partition by ...) 6830 if alias: 6831 over = None 6832 self._match(TokenType.ALIAS) 6833 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6834 return this 6835 else: 6836 over = self._prev.text.upper() 6837 6838 if comments and isinstance(func, exp.Expression): 6839 func.pop_comments() 6840 6841 if not self._match(TokenType.L_PAREN): 6842 return self.expression( 6843 exp.Window, 6844 comments=comments, 6845 this=this, 6846 alias=self._parse_id_var(False), 6847 over=over, 6848 ) 6849 6850 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6851 6852 first = self._match(TokenType.FIRST) 6853 if self._match_text_seq("LAST"): 6854 first = False 6855 6856 partition, order = self._parse_partition_and_order() 6857 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6858 6859 if kind: 6860 self._match(TokenType.BETWEEN) 6861 start = self._parse_window_spec() 6862 self._match(TokenType.AND) 6863 end = self._parse_window_spec() 6864 6865 spec = self.expression( 6866 exp.WindowSpec, 6867 kind=kind, 6868 start=start["value"], 6869 start_side=start["side"], 6870 end=end["value"], 6871 end_side=end["side"], 6872 ) 6873 else: 6874 spec = None 6875 6876 self._match_r_paren() 6877 6878 window = self.expression( 6879 exp.Window, 6880 comments=comments, 6881 this=this, 6882 partition_by=partition, 6883 order=order, 6884 spec=spec, 6885 alias=window_alias, 6886 over=over, 6887 first=first, 6888 ) 6889 6890 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6891 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6892 return self._parse_window(window, alias=alias) 6893 6894 return window 6895 6896 def _parse_partition_and_order( 6897 self, 6898 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6899 return self._parse_partition_by(), self._parse_order() 6900 6901 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6902 self._match(TokenType.BETWEEN) 6903 6904 return { 6905 "value": ( 6906 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6907 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6908 or self._parse_bitwise() 6909 ), 6910 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6911 } 6912 6913 def _parse_alias( 6914 self, this: t.Optional[exp.Expression], explicit: bool = False 6915 ) -> t.Optional[exp.Expression]: 6916 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6917 # so this section tries to parse the clause version and if it fails, it treats the token 6918 # as an identifier (alias) 6919 if self._can_parse_limit_or_offset(): 6920 return this 6921 6922 any_token = self._match(TokenType.ALIAS) 6923 comments = self._prev_comments or [] 6924 6925 if explicit and not any_token: 6926 return this 6927 6928 if self._match(TokenType.L_PAREN): 6929 aliases = self.expression( 6930 exp.Aliases, 6931 comments=comments, 6932 this=this, 6933 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6934 ) 6935 self._match_r_paren(aliases) 6936 return aliases 6937 6938 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6939 self.STRING_ALIASES and self._parse_string_as_identifier() 6940 ) 6941 6942 if alias: 6943 comments.extend(alias.pop_comments()) 6944 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6945 column = this.this 6946 6947 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6948 if not this.comments and column and column.comments: 6949 this.comments = column.pop_comments() 6950 6951 return this 6952 6953 def _parse_id_var( 6954 self, 6955 any_token: bool = True, 6956 tokens: t.Optional[t.Collection[TokenType]] = None, 6957 ) -> t.Optional[exp.Expression]: 6958 expression = self._parse_identifier() 6959 if not expression and ( 6960 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6961 ): 6962 quoted = self._prev.token_type == TokenType.STRING 6963 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6964 6965 return expression 6966 6967 def _parse_string(self) -> t.Optional[exp.Expression]: 6968 if self._match_set(self.STRING_PARSERS): 6969 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6970 return self._parse_placeholder() 6971 6972 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6973 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6974 6975 def _parse_number(self) -> t.Optional[exp.Expression]: 6976 if self._match_set(self.NUMERIC_PARSERS): 6977 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6978 return self._parse_placeholder() 6979 6980 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6981 if self._match(TokenType.IDENTIFIER): 6982 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6983 return self._parse_placeholder() 6984 6985 def _parse_var( 6986 self, 6987 any_token: bool = False, 6988 tokens: t.Optional[t.Collection[TokenType]] = None, 6989 upper: bool = False, 6990 ) -> t.Optional[exp.Expression]: 6991 if ( 6992 (any_token and self._advance_any()) 6993 or self._match(TokenType.VAR) 6994 or (self._match_set(tokens) if tokens else False) 6995 ): 6996 return self.expression( 6997 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6998 ) 6999 return self._parse_placeholder() 7000 7001 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7002 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7003 self._advance() 7004 return self._prev 7005 return None 7006 7007 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7008 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7009 7010 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7011 return self._parse_primary() or self._parse_var(any_token=True) 7012 7013 def _parse_null(self) -> t.Optional[exp.Expression]: 7014 if self._match_set(self.NULL_TOKENS): 7015 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7016 return self._parse_placeholder() 7017 7018 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7019 if self._match(TokenType.TRUE): 7020 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7021 if self._match(TokenType.FALSE): 7022 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7023 return self._parse_placeholder() 7024 7025 def _parse_star(self) -> t.Optional[exp.Expression]: 7026 if self._match(TokenType.STAR): 7027 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7028 return self._parse_placeholder() 7029 7030 def _parse_parameter(self) -> exp.Parameter: 7031 this = self._parse_identifier() or self._parse_primary_or_var() 7032 return self.expression(exp.Parameter, this=this) 7033 7034 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7035 if self._match_set(self.PLACEHOLDER_PARSERS): 7036 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7037 if placeholder: 7038 return placeholder 7039 self._advance(-1) 7040 return None 7041 7042 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7043 if not self._match_texts(keywords): 7044 return None 7045 if self._match(TokenType.L_PAREN, advance=False): 7046 return self._parse_wrapped_csv(self._parse_expression) 7047 7048 expression = self._parse_expression() 7049 return [expression] if expression else None 7050 7051 def _parse_csv( 7052 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7053 ) -> t.List[exp.Expression]: 7054 parse_result = parse_method() 7055 items = [parse_result] if parse_result is not None else [] 7056 7057 while self._match(sep): 7058 self._add_comments(parse_result) 7059 parse_result = parse_method() 7060 if parse_result is not None: 7061 items.append(parse_result) 7062 7063 return items 7064 7065 def _parse_tokens( 7066 self, parse_method: t.Callable, expressions: t.Dict 7067 ) -> t.Optional[exp.Expression]: 7068 this = parse_method() 7069 7070 while self._match_set(expressions): 7071 this = self.expression( 7072 expressions[self._prev.token_type], 7073 this=this, 7074 comments=self._prev_comments, 7075 expression=parse_method(), 7076 ) 7077 7078 return this 7079 7080 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7081 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7082 7083 def _parse_wrapped_csv( 7084 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7085 ) -> t.List[exp.Expression]: 7086 return self._parse_wrapped( 7087 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7088 ) 7089 7090 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7091 wrapped = self._match(TokenType.L_PAREN) 7092 if not wrapped and not optional: 7093 self.raise_error("Expecting (") 7094 parse_result = parse_method() 7095 if wrapped: 7096 self._match_r_paren() 7097 return parse_result 7098 7099 def _parse_expressions(self) -> t.List[exp.Expression]: 7100 return self._parse_csv(self._parse_expression) 7101 7102 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7103 return self._parse_select() or self._parse_set_operations( 7104 self._parse_alias(self._parse_assignment(), explicit=True) 7105 if alias 7106 else self._parse_assignment() 7107 ) 7108 7109 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7110 return self._parse_query_modifiers( 7111 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7112 ) 7113 7114 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7115 this = None 7116 if self._match_texts(self.TRANSACTION_KIND): 7117 this = self._prev.text 7118 7119 self._match_texts(("TRANSACTION", "WORK")) 7120 7121 modes = [] 7122 while True: 7123 mode = [] 7124 while self._match(TokenType.VAR): 7125 mode.append(self._prev.text) 7126 7127 if mode: 7128 modes.append(" ".join(mode)) 7129 if not self._match(TokenType.COMMA): 7130 break 7131 7132 return self.expression(exp.Transaction, this=this, modes=modes) 7133 7134 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7135 chain = None 7136 savepoint = None 7137 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7138 7139 self._match_texts(("TRANSACTION", "WORK")) 7140 7141 if self._match_text_seq("TO"): 7142 self._match_text_seq("SAVEPOINT") 7143 savepoint = self._parse_id_var() 7144 7145 if self._match(TokenType.AND): 7146 chain = not self._match_text_seq("NO") 7147 self._match_text_seq("CHAIN") 7148 7149 if is_rollback: 7150 return self.expression(exp.Rollback, savepoint=savepoint) 7151 7152 return self.expression(exp.Commit, chain=chain) 7153 7154 def _parse_refresh(self) -> exp.Refresh: 7155 self._match(TokenType.TABLE) 7156 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7157 7158 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7159 if not self._match_text_seq("ADD"): 7160 return None 7161 7162 self._match(TokenType.COLUMN) 7163 exists_column = self._parse_exists(not_=True) 7164 expression = self._parse_field_def() 7165 7166 if expression: 7167 expression.set("exists", exists_column) 7168 7169 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7170 if self._match_texts(("FIRST", "AFTER")): 7171 position = self._prev.text 7172 column_position = self.expression( 7173 exp.ColumnPosition, this=self._parse_column(), position=position 7174 ) 7175 expression.set("position", column_position) 7176 7177 return expression 7178 7179 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7180 drop = self._match(TokenType.DROP) and self._parse_drop() 7181 if drop and not isinstance(drop, exp.Command): 7182 drop.set("kind", drop.args.get("kind", "COLUMN")) 7183 return drop 7184 7185 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7186 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7187 return self.expression( 7188 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7189 ) 7190 7191 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7192 index = self._index - 1 7193 7194 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7195 return self._parse_csv( 7196 lambda: self.expression( 7197 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7198 ) 7199 ) 7200 7201 self._retreat(index) 7202 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7203 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7204 7205 if self._match_text_seq("ADD", "COLUMNS"): 7206 schema = self._parse_schema() 7207 if schema: 7208 return [schema] 7209 return [] 7210 7211 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7212 7213 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7214 if self._match_texts(self.ALTER_ALTER_PARSERS): 7215 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7216 7217 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7218 # keyword after ALTER we default to parsing this statement 7219 self._match(TokenType.COLUMN) 7220 column = self._parse_field(any_token=True) 7221 7222 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7223 return self.expression(exp.AlterColumn, this=column, drop=True) 7224 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7225 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7226 if self._match(TokenType.COMMENT): 7227 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7228 if self._match_text_seq("DROP", "NOT", "NULL"): 7229 return self.expression( 7230 exp.AlterColumn, 7231 this=column, 7232 drop=True, 7233 allow_null=True, 7234 ) 7235 if self._match_text_seq("SET", "NOT", "NULL"): 7236 return self.expression( 7237 exp.AlterColumn, 7238 this=column, 7239 allow_null=False, 7240 ) 7241 7242 if self._match_text_seq("SET", "VISIBLE"): 7243 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7244 if self._match_text_seq("SET", "INVISIBLE"): 7245 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7246 7247 self._match_text_seq("SET", "DATA") 7248 self._match_text_seq("TYPE") 7249 return self.expression( 7250 exp.AlterColumn, 7251 this=column, 7252 dtype=self._parse_types(), 7253 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7254 using=self._match(TokenType.USING) and self._parse_assignment(), 7255 ) 7256 7257 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7258 if self._match_texts(("ALL", "EVEN", "AUTO")): 7259 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7260 7261 self._match_text_seq("KEY", "DISTKEY") 7262 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7263 7264 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7265 if compound: 7266 self._match_text_seq("SORTKEY") 7267 7268 if self._match(TokenType.L_PAREN, advance=False): 7269 return self.expression( 7270 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7271 ) 7272 7273 self._match_texts(("AUTO", "NONE")) 7274 return self.expression( 7275 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7276 ) 7277 7278 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7279 index = self._index - 1 7280 7281 partition_exists = self._parse_exists() 7282 if self._match(TokenType.PARTITION, advance=False): 7283 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7284 7285 self._retreat(index) 7286 return self._parse_csv(self._parse_drop_column) 7287 7288 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7289 if self._match(TokenType.COLUMN): 7290 exists = self._parse_exists() 7291 old_column = self._parse_column() 7292 to = self._match_text_seq("TO") 7293 new_column = self._parse_column() 7294 7295 if old_column is None or to is None or new_column is None: 7296 return None 7297 7298 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7299 7300 self._match_text_seq("TO") 7301 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7302 7303 def _parse_alter_table_set(self) -> exp.AlterSet: 7304 alter_set = self.expression(exp.AlterSet) 7305 7306 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7307 "TABLE", "PROPERTIES" 7308 ): 7309 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7310 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7311 alter_set.set("expressions", [self._parse_assignment()]) 7312 elif self._match_texts(("LOGGED", "UNLOGGED")): 7313 alter_set.set("option", exp.var(self._prev.text.upper())) 7314 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7315 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7316 elif self._match_text_seq("LOCATION"): 7317 alter_set.set("location", self._parse_field()) 7318 elif self._match_text_seq("ACCESS", "METHOD"): 7319 alter_set.set("access_method", self._parse_field()) 7320 elif self._match_text_seq("TABLESPACE"): 7321 alter_set.set("tablespace", self._parse_field()) 7322 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7323 alter_set.set("file_format", [self._parse_field()]) 7324 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7325 alter_set.set("file_format", self._parse_wrapped_options()) 7326 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7327 alter_set.set("copy_options", self._parse_wrapped_options()) 7328 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7329 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7330 else: 7331 if self._match_text_seq("SERDE"): 7332 alter_set.set("serde", self._parse_field()) 7333 7334 alter_set.set("expressions", [self._parse_properties()]) 7335 7336 return alter_set 7337 7338 def _parse_alter(self) -> exp.Alter | exp.Command: 7339 start = self._prev 7340 7341 alter_token = self._match_set(self.ALTERABLES) and self._prev 7342 if not alter_token: 7343 return self._parse_as_command(start) 7344 7345 exists = self._parse_exists() 7346 only = self._match_text_seq("ONLY") 7347 this = self._parse_table(schema=True) 7348 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7349 7350 if self._next: 7351 self._advance() 7352 7353 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7354 if parser: 7355 actions = ensure_list(parser(self)) 7356 not_valid = self._match_text_seq("NOT", "VALID") 7357 options = self._parse_csv(self._parse_property) 7358 7359 if not self._curr and actions: 7360 return self.expression( 7361 exp.Alter, 7362 this=this, 7363 kind=alter_token.text.upper(), 7364 exists=exists, 7365 actions=actions, 7366 only=only, 7367 options=options, 7368 cluster=cluster, 7369 not_valid=not_valid, 7370 ) 7371 7372 return self._parse_as_command(start) 7373 7374 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7375 start = self._prev 7376 # https://duckdb.org/docs/sql/statements/analyze 7377 if not self._curr: 7378 return self.expression(exp.Analyze) 7379 7380 options = [] 7381 while self._match_texts(self.ANALYZE_STYLES): 7382 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7383 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7384 else: 7385 options.append(self._prev.text.upper()) 7386 7387 this: t.Optional[exp.Expression] = None 7388 inner_expression: t.Optional[exp.Expression] = None 7389 7390 kind = self._curr and self._curr.text.upper() 7391 7392 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7393 this = self._parse_table_parts() 7394 elif self._match_text_seq("TABLES"): 7395 if self._match_set((TokenType.FROM, TokenType.IN)): 7396 kind = f"{kind} {self._prev.text.upper()}" 7397 this = self._parse_table(schema=True, is_db_reference=True) 7398 elif self._match_text_seq("DATABASE"): 7399 this = self._parse_table(schema=True, is_db_reference=True) 7400 elif self._match_text_seq("CLUSTER"): 7401 this = self._parse_table() 7402 # Try matching inner expr keywords before fallback to parse table. 7403 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7404 kind = None 7405 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7406 else: 7407 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7408 kind = None 7409 this = self._parse_table_parts() 7410 7411 partition = self._try_parse(self._parse_partition) 7412 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7413 return self._parse_as_command(start) 7414 7415 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7416 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7417 "WITH", "ASYNC", "MODE" 7418 ): 7419 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7420 else: 7421 mode = None 7422 7423 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7424 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7425 7426 properties = self._parse_properties() 7427 return self.expression( 7428 exp.Analyze, 7429 kind=kind, 7430 this=this, 7431 mode=mode, 7432 partition=partition, 7433 properties=properties, 7434 expression=inner_expression, 7435 options=options, 7436 ) 7437 7438 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7439 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7440 this = None 7441 kind = self._prev.text.upper() 7442 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7443 expressions = [] 7444 7445 if not self._match_text_seq("STATISTICS"): 7446 self.raise_error("Expecting token STATISTICS") 7447 7448 if self._match_text_seq("NOSCAN"): 7449 this = "NOSCAN" 7450 elif self._match(TokenType.FOR): 7451 if self._match_text_seq("ALL", "COLUMNS"): 7452 this = "FOR ALL COLUMNS" 7453 if self._match_texts("COLUMNS"): 7454 this = "FOR COLUMNS" 7455 expressions = self._parse_csv(self._parse_column_reference) 7456 elif self._match_text_seq("SAMPLE"): 7457 sample = self._parse_number() 7458 expressions = [ 7459 self.expression( 7460 exp.AnalyzeSample, 7461 sample=sample, 7462 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7463 ) 7464 ] 7465 7466 return self.expression( 7467 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7468 ) 7469 7470 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7471 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7472 kind = None 7473 this = None 7474 expression: t.Optional[exp.Expression] = None 7475 if self._match_text_seq("REF", "UPDATE"): 7476 kind = "REF" 7477 this = "UPDATE" 7478 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7479 this = "UPDATE SET DANGLING TO NULL" 7480 elif self._match_text_seq("STRUCTURE"): 7481 kind = "STRUCTURE" 7482 if self._match_text_seq("CASCADE", "FAST"): 7483 this = "CASCADE FAST" 7484 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7485 ("ONLINE", "OFFLINE") 7486 ): 7487 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7488 expression = self._parse_into() 7489 7490 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7491 7492 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7493 this = self._prev.text.upper() 7494 if self._match_text_seq("COLUMNS"): 7495 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7496 return None 7497 7498 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7499 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7500 if self._match_text_seq("STATISTICS"): 7501 return self.expression(exp.AnalyzeDelete, kind=kind) 7502 return None 7503 7504 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7505 if self._match_text_seq("CHAINED", "ROWS"): 7506 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7507 return None 7508 7509 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7510 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7511 this = self._prev.text.upper() 7512 expression: t.Optional[exp.Expression] = None 7513 expressions = [] 7514 update_options = None 7515 7516 if self._match_text_seq("HISTOGRAM", "ON"): 7517 expressions = self._parse_csv(self._parse_column_reference) 7518 with_expressions = [] 7519 while self._match(TokenType.WITH): 7520 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7521 if self._match_texts(("SYNC", "ASYNC")): 7522 if self._match_text_seq("MODE", advance=False): 7523 with_expressions.append(f"{self._prev.text.upper()} MODE") 7524 self._advance() 7525 else: 7526 buckets = self._parse_number() 7527 if self._match_text_seq("BUCKETS"): 7528 with_expressions.append(f"{buckets} BUCKETS") 7529 if with_expressions: 7530 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7531 7532 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7533 TokenType.UPDATE, advance=False 7534 ): 7535 update_options = self._prev.text.upper() 7536 self._advance() 7537 elif self._match_text_seq("USING", "DATA"): 7538 expression = self.expression(exp.UsingData, this=self._parse_string()) 7539 7540 return self.expression( 7541 exp.AnalyzeHistogram, 7542 this=this, 7543 expressions=expressions, 7544 expression=expression, 7545 update_options=update_options, 7546 ) 7547 7548 def _parse_merge(self) -> exp.Merge: 7549 self._match(TokenType.INTO) 7550 target = self._parse_table() 7551 7552 if target and self._match(TokenType.ALIAS, advance=False): 7553 target.set("alias", self._parse_table_alias()) 7554 7555 self._match(TokenType.USING) 7556 using = self._parse_table() 7557 7558 self._match(TokenType.ON) 7559 on = self._parse_assignment() 7560 7561 return self.expression( 7562 exp.Merge, 7563 this=target, 7564 using=using, 7565 on=on, 7566 whens=self._parse_when_matched(), 7567 returning=self._parse_returning(), 7568 ) 7569 7570 def _parse_when_matched(self) -> exp.Whens: 7571 whens = [] 7572 7573 while self._match(TokenType.WHEN): 7574 matched = not self._match(TokenType.NOT) 7575 self._match_text_seq("MATCHED") 7576 source = ( 7577 False 7578 if self._match_text_seq("BY", "TARGET") 7579 else self._match_text_seq("BY", "SOURCE") 7580 ) 7581 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7582 7583 self._match(TokenType.THEN) 7584 7585 if self._match(TokenType.INSERT): 7586 this = self._parse_star() 7587 if this: 7588 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7589 else: 7590 then = self.expression( 7591 exp.Insert, 7592 this=exp.var("ROW") 7593 if self._match_text_seq("ROW") 7594 else self._parse_value(values=False), 7595 expression=self._match_text_seq("VALUES") and self._parse_value(), 7596 ) 7597 elif self._match(TokenType.UPDATE): 7598 expressions = self._parse_star() 7599 if expressions: 7600 then = self.expression(exp.Update, expressions=expressions) 7601 else: 7602 then = self.expression( 7603 exp.Update, 7604 expressions=self._match(TokenType.SET) 7605 and self._parse_csv(self._parse_equality), 7606 ) 7607 elif self._match(TokenType.DELETE): 7608 then = self.expression(exp.Var, this=self._prev.text) 7609 else: 7610 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7611 7612 whens.append( 7613 self.expression( 7614 exp.When, 7615 matched=matched, 7616 source=source, 7617 condition=condition, 7618 then=then, 7619 ) 7620 ) 7621 return self.expression(exp.Whens, expressions=whens) 7622 7623 def _parse_show(self) -> t.Optional[exp.Expression]: 7624 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7625 if parser: 7626 return parser(self) 7627 return self._parse_as_command(self._prev) 7628 7629 def _parse_set_item_assignment( 7630 self, kind: t.Optional[str] = None 7631 ) -> t.Optional[exp.Expression]: 7632 index = self._index 7633 7634 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7635 return self._parse_set_transaction(global_=kind == "GLOBAL") 7636 7637 left = self._parse_primary() or self._parse_column() 7638 assignment_delimiter = self._match_texts(("=", "TO")) 7639 7640 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7641 self._retreat(index) 7642 return None 7643 7644 right = self._parse_statement() or self._parse_id_var() 7645 if isinstance(right, (exp.Column, exp.Identifier)): 7646 right = exp.var(right.name) 7647 7648 this = self.expression(exp.EQ, this=left, expression=right) 7649 return self.expression(exp.SetItem, this=this, kind=kind) 7650 7651 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7652 self._match_text_seq("TRANSACTION") 7653 characteristics = self._parse_csv( 7654 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7655 ) 7656 return self.expression( 7657 exp.SetItem, 7658 expressions=characteristics, 7659 kind="TRANSACTION", 7660 **{"global": global_}, # type: ignore 7661 ) 7662 7663 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7664 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7665 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7666 7667 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7668 index = self._index 7669 set_ = self.expression( 7670 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7671 ) 7672 7673 if self._curr: 7674 self._retreat(index) 7675 return self._parse_as_command(self._prev) 7676 7677 return set_ 7678 7679 def _parse_var_from_options( 7680 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7681 ) -> t.Optional[exp.Var]: 7682 start = self._curr 7683 if not start: 7684 return None 7685 7686 option = start.text.upper() 7687 continuations = options.get(option) 7688 7689 index = self._index 7690 self._advance() 7691 for keywords in continuations or []: 7692 if isinstance(keywords, str): 7693 keywords = (keywords,) 7694 7695 if self._match_text_seq(*keywords): 7696 option = f"{option} {' '.join(keywords)}" 7697 break 7698 else: 7699 if continuations or continuations is None: 7700 if raise_unmatched: 7701 self.raise_error(f"Unknown option {option}") 7702 7703 self._retreat(index) 7704 return None 7705 7706 return exp.var(option) 7707 7708 def _parse_as_command(self, start: Token) -> exp.Command: 7709 while self._curr: 7710 self._advance() 7711 text = self._find_sql(start, self._prev) 7712 size = len(start.text) 7713 self._warn_unsupported() 7714 return exp.Command(this=text[:size], expression=text[size:]) 7715 7716 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7717 settings = [] 7718 7719 self._match_l_paren() 7720 kind = self._parse_id_var() 7721 7722 if self._match(TokenType.L_PAREN): 7723 while True: 7724 key = self._parse_id_var() 7725 value = self._parse_primary() 7726 if not key and value is None: 7727 break 7728 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7729 self._match(TokenType.R_PAREN) 7730 7731 self._match_r_paren() 7732 7733 return self.expression( 7734 exp.DictProperty, 7735 this=this, 7736 kind=kind.this if kind else None, 7737 settings=settings, 7738 ) 7739 7740 def _parse_dict_range(self, this: str) -> exp.DictRange: 7741 self._match_l_paren() 7742 has_min = self._match_text_seq("MIN") 7743 if has_min: 7744 min = self._parse_var() or self._parse_primary() 7745 self._match_text_seq("MAX") 7746 max = self._parse_var() or self._parse_primary() 7747 else: 7748 max = self._parse_var() or self._parse_primary() 7749 min = exp.Literal.number(0) 7750 self._match_r_paren() 7751 return self.expression(exp.DictRange, this=this, min=min, max=max) 7752 7753 def _parse_comprehension( 7754 self, this: t.Optional[exp.Expression] 7755 ) -> t.Optional[exp.Comprehension]: 7756 index = self._index 7757 expression = self._parse_column() 7758 if not self._match(TokenType.IN): 7759 self._retreat(index - 1) 7760 return None 7761 iterator = self._parse_column() 7762 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7763 return self.expression( 7764 exp.Comprehension, 7765 this=this, 7766 expression=expression, 7767 iterator=iterator, 7768 condition=condition, 7769 ) 7770 7771 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7772 if self._match(TokenType.HEREDOC_STRING): 7773 return self.expression(exp.Heredoc, this=self._prev.text) 7774 7775 if not self._match_text_seq("$"): 7776 return None 7777 7778 tags = ["$"] 7779 tag_text = None 7780 7781 if self._is_connected(): 7782 self._advance() 7783 tags.append(self._prev.text.upper()) 7784 else: 7785 self.raise_error("No closing $ found") 7786 7787 if tags[-1] != "$": 7788 if self._is_connected() and self._match_text_seq("$"): 7789 tag_text = tags[-1] 7790 tags.append("$") 7791 else: 7792 self.raise_error("No closing $ found") 7793 7794 heredoc_start = self._curr 7795 7796 while self._curr: 7797 if self._match_text_seq(*tags, advance=False): 7798 this = self._find_sql(heredoc_start, self._prev) 7799 self._advance(len(tags)) 7800 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7801 7802 self._advance() 7803 7804 self.raise_error(f"No closing {''.join(tags)} found") 7805 return None 7806 7807 def _find_parser( 7808 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7809 ) -> t.Optional[t.Callable]: 7810 if not self._curr: 7811 return None 7812 7813 index = self._index 7814 this = [] 7815 while True: 7816 # The current token might be multiple words 7817 curr = self._curr.text.upper() 7818 key = curr.split(" ") 7819 this.append(curr) 7820 7821 self._advance() 7822 result, trie = in_trie(trie, key) 7823 if result == TrieResult.FAILED: 7824 break 7825 7826 if result == TrieResult.EXISTS: 7827 subparser = parsers[" ".join(this)] 7828 return subparser 7829 7830 self._retreat(index) 7831 return None 7832 7833 def _match(self, token_type, advance=True, expression=None): 7834 if not self._curr: 7835 return None 7836 7837 if self._curr.token_type == token_type: 7838 if advance: 7839 self._advance() 7840 self._add_comments(expression) 7841 return True 7842 7843 return None 7844 7845 def _match_set(self, types, advance=True): 7846 if not self._curr: 7847 return None 7848 7849 if self._curr.token_type in types: 7850 if advance: 7851 self._advance() 7852 return True 7853 7854 return None 7855 7856 def _match_pair(self, token_type_a, token_type_b, advance=True): 7857 if not self._curr or not self._next: 7858 return None 7859 7860 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7861 if advance: 7862 self._advance(2) 7863 return True 7864 7865 return None 7866 7867 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7868 if not self._match(TokenType.L_PAREN, expression=expression): 7869 self.raise_error("Expecting (") 7870 7871 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7872 if not self._match(TokenType.R_PAREN, expression=expression): 7873 self.raise_error("Expecting )") 7874 7875 def _match_texts(self, texts, advance=True): 7876 if ( 7877 self._curr 7878 and self._curr.token_type != TokenType.STRING 7879 and self._curr.text.upper() in texts 7880 ): 7881 if advance: 7882 self._advance() 7883 return True 7884 return None 7885 7886 def _match_text_seq(self, *texts, advance=True): 7887 index = self._index 7888 for text in texts: 7889 if ( 7890 self._curr 7891 and self._curr.token_type != TokenType.STRING 7892 and self._curr.text.upper() == text 7893 ): 7894 self._advance() 7895 else: 7896 self._retreat(index) 7897 return None 7898 7899 if not advance: 7900 self._retreat(index) 7901 7902 return True 7903 7904 def _replace_lambda( 7905 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7906 ) -> t.Optional[exp.Expression]: 7907 if not node: 7908 return node 7909 7910 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7911 7912 for column in node.find_all(exp.Column): 7913 typ = lambda_types.get(column.parts[0].name) 7914 if typ is not None: 7915 dot_or_id = column.to_dot() if column.table else column.this 7916 7917 if typ: 7918 dot_or_id = self.expression( 7919 exp.Cast, 7920 this=dot_or_id, 7921 to=typ, 7922 ) 7923 7924 parent = column.parent 7925 7926 while isinstance(parent, exp.Dot): 7927 if not isinstance(parent.parent, exp.Dot): 7928 parent.replace(dot_or_id) 7929 break 7930 parent = parent.parent 7931 else: 7932 if column is node: 7933 node = dot_or_id 7934 else: 7935 column.replace(dot_or_id) 7936 return node 7937 7938 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7939 start = self._prev 7940 7941 # Not to be confused with TRUNCATE(number, decimals) function call 7942 if self._match(TokenType.L_PAREN): 7943 self._retreat(self._index - 2) 7944 return self._parse_function() 7945 7946 # Clickhouse supports TRUNCATE DATABASE as well 7947 is_database = self._match(TokenType.DATABASE) 7948 7949 self._match(TokenType.TABLE) 7950 7951 exists = self._parse_exists(not_=False) 7952 7953 expressions = self._parse_csv( 7954 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7955 ) 7956 7957 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7958 7959 if self._match_text_seq("RESTART", "IDENTITY"): 7960 identity = "RESTART" 7961 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7962 identity = "CONTINUE" 7963 else: 7964 identity = None 7965 7966 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7967 option = self._prev.text 7968 else: 7969 option = None 7970 7971 partition = self._parse_partition() 7972 7973 # Fallback case 7974 if self._curr: 7975 return self._parse_as_command(start) 7976 7977 return self.expression( 7978 exp.TruncateTable, 7979 expressions=expressions, 7980 is_database=is_database, 7981 exists=exists, 7982 cluster=cluster, 7983 identity=identity, 7984 option=option, 7985 partition=partition, 7986 ) 7987 7988 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7989 this = self._parse_ordered(self._parse_opclass) 7990 7991 if not self._match(TokenType.WITH): 7992 return this 7993 7994 op = self._parse_var(any_token=True) 7995 7996 return self.expression(exp.WithOperator, this=this, op=op) 7997 7998 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7999 self._match(TokenType.EQ) 8000 self._match(TokenType.L_PAREN) 8001 8002 opts: t.List[t.Optional[exp.Expression]] = [] 8003 option: exp.Expression | None 8004 while self._curr and not self._match(TokenType.R_PAREN): 8005 if self._match_text_seq("FORMAT_NAME", "="): 8006 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8007 option = self._parse_format_name() 8008 else: 8009 option = self._parse_property() 8010 8011 if option is None: 8012 self.raise_error("Unable to parse option") 8013 break 8014 8015 opts.append(option) 8016 8017 return opts 8018 8019 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8020 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8021 8022 options = [] 8023 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8024 option = self._parse_var(any_token=True) 8025 prev = self._prev.text.upper() 8026 8027 # Different dialects might separate options and values by white space, "=" and "AS" 8028 self._match(TokenType.EQ) 8029 self._match(TokenType.ALIAS) 8030 8031 param = self.expression(exp.CopyParameter, this=option) 8032 8033 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8034 TokenType.L_PAREN, advance=False 8035 ): 8036 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8037 param.set("expressions", self._parse_wrapped_options()) 8038 elif prev == "FILE_FORMAT": 8039 # T-SQL's external file format case 8040 param.set("expression", self._parse_field()) 8041 else: 8042 param.set("expression", self._parse_unquoted_field()) 8043 8044 options.append(param) 8045 self._match(sep) 8046 8047 return options 8048 8049 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8050 expr = self.expression(exp.Credentials) 8051 8052 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8053 expr.set("storage", self._parse_field()) 8054 if self._match_text_seq("CREDENTIALS"): 8055 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8056 creds = ( 8057 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8058 ) 8059 expr.set("credentials", creds) 8060 if self._match_text_seq("ENCRYPTION"): 8061 expr.set("encryption", self._parse_wrapped_options()) 8062 if self._match_text_seq("IAM_ROLE"): 8063 expr.set("iam_role", self._parse_field()) 8064 if self._match_text_seq("REGION"): 8065 expr.set("region", self._parse_field()) 8066 8067 return expr 8068 8069 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8070 return self._parse_field() 8071 8072 def _parse_copy(self) -> exp.Copy | exp.Command: 8073 start = self._prev 8074 8075 self._match(TokenType.INTO) 8076 8077 this = ( 8078 self._parse_select(nested=True, parse_subquery_alias=False) 8079 if self._match(TokenType.L_PAREN, advance=False) 8080 else self._parse_table(schema=True) 8081 ) 8082 8083 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8084 8085 files = self._parse_csv(self._parse_file_location) 8086 credentials = self._parse_credentials() 8087 8088 self._match_text_seq("WITH") 8089 8090 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8091 8092 # Fallback case 8093 if self._curr: 8094 return self._parse_as_command(start) 8095 8096 return self.expression( 8097 exp.Copy, 8098 this=this, 8099 kind=kind, 8100 credentials=credentials, 8101 files=files, 8102 params=params, 8103 ) 8104 8105 def _parse_normalize(self) -> exp.Normalize: 8106 return self.expression( 8107 exp.Normalize, 8108 this=self._parse_bitwise(), 8109 form=self._match(TokenType.COMMA) and self._parse_var(), 8110 ) 8111 8112 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8113 args = self._parse_csv(lambda: self._parse_lambda()) 8114 8115 this = seq_get(args, 0) 8116 decimals = seq_get(args, 1) 8117 8118 return expr_type( 8119 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8120 ) 8121 8122 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8123 if self._match_text_seq("COLUMNS", "(", advance=False): 8124 this = self._parse_function() 8125 if isinstance(this, exp.Columns): 8126 this.set("unpack", True) 8127 return this 8128 8129 return self.expression( 8130 exp.Star, 8131 **{ # type: ignore 8132 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8133 "replace": self._parse_star_op("REPLACE"), 8134 "rename": self._parse_star_op("RENAME"), 8135 }, 8136 ) 8137 8138 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8139 privilege_parts = [] 8140 8141 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8142 # (end of privilege list) or L_PAREN (start of column list) are met 8143 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8144 privilege_parts.append(self._curr.text.upper()) 8145 self._advance() 8146 8147 this = exp.var(" ".join(privilege_parts)) 8148 expressions = ( 8149 self._parse_wrapped_csv(self._parse_column) 8150 if self._match(TokenType.L_PAREN, advance=False) 8151 else None 8152 ) 8153 8154 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8155 8156 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8157 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8158 principal = self._parse_id_var() 8159 8160 if not principal: 8161 return None 8162 8163 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8164 8165 def _parse_grant(self) -> exp.Grant | exp.Command: 8166 start = self._prev 8167 8168 privileges = self._parse_csv(self._parse_grant_privilege) 8169 8170 self._match(TokenType.ON) 8171 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8172 8173 # Attempt to parse the securable e.g. MySQL allows names 8174 # such as "foo.*", "*.*" which are not easily parseable yet 8175 securable = self._try_parse(self._parse_table_parts) 8176 8177 if not securable or not self._match_text_seq("TO"): 8178 return self._parse_as_command(start) 8179 8180 principals = self._parse_csv(self._parse_grant_principal) 8181 8182 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8183 8184 if self._curr: 8185 return self._parse_as_command(start) 8186 8187 return self.expression( 8188 exp.Grant, 8189 privileges=privileges, 8190 kind=kind, 8191 securable=securable, 8192 principals=principals, 8193 grant_option=grant_option, 8194 ) 8195 8196 def _parse_overlay(self) -> exp.Overlay: 8197 return self.expression( 8198 exp.Overlay, 8199 **{ # type: ignore 8200 "this": self._parse_bitwise(), 8201 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8202 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8203 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8204 }, 8205 ) 8206 8207 def _parse_format_name(self) -> exp.Property: 8208 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8209 # for FILE_FORMAT = <format_name> 8210 return self.expression( 8211 exp.Property, 8212 this=exp.var("FORMAT_NAME"), 8213 value=self._parse_string() or self._parse_table_parts(), 8214 )
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
176class Parser(metaclass=_Parser): 177 """ 178 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 179 180 Args: 181 error_level: The desired error level. 182 Default: ErrorLevel.IMMEDIATE 183 error_message_context: The amount of context to capture from a query string when displaying 184 the error message (in number of characters). 185 Default: 100 186 max_errors: Maximum number of error messages to include in a raised ParseError. 187 This is only relevant if error_level is ErrorLevel.RAISE. 188 Default: 3 189 """ 190 191 FUNCTIONS: t.Dict[str, t.Callable] = { 192 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 193 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 194 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 195 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 196 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 197 ), 198 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 199 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 200 ), 201 "CHAR": lambda args: exp.Chr(expressions=args), 202 "CHR": lambda args: exp.Chr(expressions=args), 203 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 204 "CONCAT": lambda args, dialect: exp.Concat( 205 expressions=args, 206 safe=not dialect.STRICT_STRING_CONCAT, 207 coalesce=dialect.CONCAT_COALESCE, 208 ), 209 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONVERT_TIMEZONE": build_convert_timezone, 215 "DATE_TO_DATE_STR": lambda args: exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 220 start=seq_get(args, 0), 221 end=seq_get(args, 1), 222 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 223 ), 224 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 225 "HEX": build_hex, 226 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 227 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 228 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 229 "LIKE": build_like, 230 "LOG": build_logarithm, 231 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 232 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 233 "LOWER": build_lower, 234 "LPAD": lambda args: build_pad(args), 235 "LEFTPAD": lambda args: build_pad(args), 236 "LTRIM": lambda args: build_trim(args), 237 "MOD": build_mod, 238 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 239 "RPAD": lambda args: build_pad(args, is_left=False), 240 "RTRIM": lambda args: build_trim(args, is_left=False), 241 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 242 if len(args) != 2 243 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 244 "STRPOS": exp.StrPosition.from_arg_list, 245 "CHARINDEX": lambda args: build_locate_strposition(args), 246 "INSTR": exp.StrPosition.from_arg_list, 247 "LOCATE": lambda args: build_locate_strposition(args), 248 "TIME_TO_TIME_STR": lambda args: exp.Cast( 249 this=seq_get(args, 0), 250 to=exp.DataType(this=exp.DataType.Type.TEXT), 251 ), 252 "TO_HEX": build_hex, 253 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 254 this=exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 start=exp.Literal.number(1), 259 length=exp.Literal.number(10), 260 ), 261 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 262 "UPPER": build_upper, 263 "VAR_MAP": build_var_map, 264 } 265 266 NO_PAREN_FUNCTIONS = { 267 TokenType.CURRENT_DATE: exp.CurrentDate, 268 TokenType.CURRENT_DATETIME: exp.CurrentDate, 269 TokenType.CURRENT_TIME: exp.CurrentTime, 270 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 271 TokenType.CURRENT_USER: exp.CurrentUser, 272 } 273 274 STRUCT_TYPE_TOKENS = { 275 TokenType.NESTED, 276 TokenType.OBJECT, 277 TokenType.STRUCT, 278 TokenType.UNION, 279 } 280 281 NESTED_TYPE_TOKENS = { 282 TokenType.ARRAY, 283 TokenType.LIST, 284 TokenType.LOWCARDINALITY, 285 TokenType.MAP, 286 TokenType.NULLABLE, 287 TokenType.RANGE, 288 *STRUCT_TYPE_TOKENS, 289 } 290 291 ENUM_TYPE_TOKENS = { 292 TokenType.DYNAMIC, 293 TokenType.ENUM, 294 TokenType.ENUM8, 295 TokenType.ENUM16, 296 } 297 298 AGGREGATE_TYPE_TOKENS = { 299 TokenType.AGGREGATEFUNCTION, 300 TokenType.SIMPLEAGGREGATEFUNCTION, 301 } 302 303 TYPE_TOKENS = { 304 TokenType.BIT, 305 TokenType.BOOLEAN, 306 TokenType.TINYINT, 307 TokenType.UTINYINT, 308 TokenType.SMALLINT, 309 TokenType.USMALLINT, 310 TokenType.INT, 311 TokenType.UINT, 312 TokenType.BIGINT, 313 TokenType.UBIGINT, 314 TokenType.INT128, 315 TokenType.UINT128, 316 TokenType.INT256, 317 TokenType.UINT256, 318 TokenType.MEDIUMINT, 319 TokenType.UMEDIUMINT, 320 TokenType.FIXEDSTRING, 321 TokenType.FLOAT, 322 TokenType.DOUBLE, 323 TokenType.UDOUBLE, 324 TokenType.CHAR, 325 TokenType.NCHAR, 326 TokenType.VARCHAR, 327 TokenType.NVARCHAR, 328 TokenType.BPCHAR, 329 TokenType.TEXT, 330 TokenType.MEDIUMTEXT, 331 TokenType.LONGTEXT, 332 TokenType.BLOB, 333 TokenType.MEDIUMBLOB, 334 TokenType.LONGBLOB, 335 TokenType.BINARY, 336 TokenType.VARBINARY, 337 TokenType.JSON, 338 TokenType.JSONB, 339 TokenType.INTERVAL, 340 TokenType.TINYBLOB, 341 TokenType.TINYTEXT, 342 TokenType.TIME, 343 TokenType.TIMETZ, 344 TokenType.TIMESTAMP, 345 TokenType.TIMESTAMP_S, 346 TokenType.TIMESTAMP_MS, 347 TokenType.TIMESTAMP_NS, 348 TokenType.TIMESTAMPTZ, 349 TokenType.TIMESTAMPLTZ, 350 TokenType.TIMESTAMPNTZ, 351 TokenType.DATETIME, 352 TokenType.DATETIME2, 353 TokenType.DATETIME64, 354 TokenType.SMALLDATETIME, 355 TokenType.DATE, 356 TokenType.DATE32, 357 TokenType.INT4RANGE, 358 TokenType.INT4MULTIRANGE, 359 TokenType.INT8RANGE, 360 TokenType.INT8MULTIRANGE, 361 TokenType.NUMRANGE, 362 TokenType.NUMMULTIRANGE, 363 TokenType.TSRANGE, 364 TokenType.TSMULTIRANGE, 365 TokenType.TSTZRANGE, 366 TokenType.TSTZMULTIRANGE, 367 TokenType.DATERANGE, 368 TokenType.DATEMULTIRANGE, 369 TokenType.DECIMAL, 370 TokenType.DECIMAL32, 371 TokenType.DECIMAL64, 372 TokenType.DECIMAL128, 373 TokenType.DECIMAL256, 374 TokenType.UDECIMAL, 375 TokenType.BIGDECIMAL, 376 TokenType.UUID, 377 TokenType.GEOGRAPHY, 378 TokenType.GEOMETRY, 379 TokenType.POINT, 380 TokenType.RING, 381 TokenType.LINESTRING, 382 TokenType.MULTILINESTRING, 383 TokenType.POLYGON, 384 TokenType.MULTIPOLYGON, 385 TokenType.HLLSKETCH, 386 TokenType.HSTORE, 387 TokenType.PSEUDO_TYPE, 388 TokenType.SUPER, 389 TokenType.SERIAL, 390 TokenType.SMALLSERIAL, 391 TokenType.BIGSERIAL, 392 TokenType.XML, 393 TokenType.YEAR, 394 TokenType.USERDEFINED, 395 TokenType.MONEY, 396 TokenType.SMALLMONEY, 397 TokenType.ROWVERSION, 398 TokenType.IMAGE, 399 TokenType.VARIANT, 400 TokenType.VECTOR, 401 TokenType.OBJECT, 402 TokenType.OBJECT_IDENTIFIER, 403 TokenType.INET, 404 TokenType.IPADDRESS, 405 TokenType.IPPREFIX, 406 TokenType.IPV4, 407 TokenType.IPV6, 408 TokenType.UNKNOWN, 409 TokenType.NULL, 410 TokenType.NAME, 411 TokenType.TDIGEST, 412 TokenType.DYNAMIC, 413 *ENUM_TYPE_TOKENS, 414 *NESTED_TYPE_TOKENS, 415 *AGGREGATE_TYPE_TOKENS, 416 } 417 418 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 419 TokenType.BIGINT: TokenType.UBIGINT, 420 TokenType.INT: TokenType.UINT, 421 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 422 TokenType.SMALLINT: TokenType.USMALLINT, 423 TokenType.TINYINT: TokenType.UTINYINT, 424 TokenType.DECIMAL: TokenType.UDECIMAL, 425 TokenType.DOUBLE: TokenType.UDOUBLE, 426 } 427 428 SUBQUERY_PREDICATES = { 429 TokenType.ANY: exp.Any, 430 TokenType.ALL: exp.All, 431 TokenType.EXISTS: exp.Exists, 432 TokenType.SOME: exp.Any, 433 } 434 435 RESERVED_TOKENS = { 436 *Tokenizer.SINGLE_TOKENS.values(), 437 TokenType.SELECT, 438 } - {TokenType.IDENTIFIER} 439 440 DB_CREATABLES = { 441 TokenType.DATABASE, 442 TokenType.DICTIONARY, 443 TokenType.FILE_FORMAT, 444 TokenType.MODEL, 445 TokenType.NAMESPACE, 446 TokenType.SCHEMA, 447 TokenType.SEQUENCE, 448 TokenType.SINK, 449 TokenType.SOURCE, 450 TokenType.STAGE, 451 TokenType.STORAGE_INTEGRATION, 452 TokenType.STREAMLIT, 453 TokenType.TABLE, 454 TokenType.TAG, 455 TokenType.VIEW, 456 TokenType.WAREHOUSE, 457 } 458 459 CREATABLES = { 460 TokenType.COLUMN, 461 TokenType.CONSTRAINT, 462 TokenType.FOREIGN_KEY, 463 TokenType.FUNCTION, 464 TokenType.INDEX, 465 TokenType.PROCEDURE, 466 *DB_CREATABLES, 467 } 468 469 ALTERABLES = { 470 TokenType.INDEX, 471 TokenType.TABLE, 472 TokenType.VIEW, 473 } 474 475 # Tokens that can represent identifiers 476 ID_VAR_TOKENS = { 477 TokenType.ALL, 478 TokenType.ATTACH, 479 TokenType.VAR, 480 TokenType.ANTI, 481 TokenType.APPLY, 482 TokenType.ASC, 483 TokenType.ASOF, 484 TokenType.AUTO_INCREMENT, 485 TokenType.BEGIN, 486 TokenType.BPCHAR, 487 TokenType.CACHE, 488 TokenType.CASE, 489 TokenType.COLLATE, 490 TokenType.COMMAND, 491 TokenType.COMMENT, 492 TokenType.COMMIT, 493 TokenType.CONSTRAINT, 494 TokenType.COPY, 495 TokenType.CUBE, 496 TokenType.CURRENT_SCHEMA, 497 TokenType.DEFAULT, 498 TokenType.DELETE, 499 TokenType.DESC, 500 TokenType.DESCRIBE, 501 TokenType.DETACH, 502 TokenType.DICTIONARY, 503 TokenType.DIV, 504 TokenType.END, 505 TokenType.EXECUTE, 506 TokenType.EXPORT, 507 TokenType.ESCAPE, 508 TokenType.FALSE, 509 TokenType.FIRST, 510 TokenType.FILTER, 511 TokenType.FINAL, 512 TokenType.FORMAT, 513 TokenType.FULL, 514 TokenType.IDENTIFIER, 515 TokenType.IS, 516 TokenType.ISNULL, 517 TokenType.INTERVAL, 518 TokenType.KEEP, 519 TokenType.KILL, 520 TokenType.LEFT, 521 TokenType.LIMIT, 522 TokenType.LOAD, 523 TokenType.MERGE, 524 TokenType.NATURAL, 525 TokenType.NEXT, 526 TokenType.OFFSET, 527 TokenType.OPERATOR, 528 TokenType.ORDINALITY, 529 TokenType.OVERLAPS, 530 TokenType.OVERWRITE, 531 TokenType.PARTITION, 532 TokenType.PERCENT, 533 TokenType.PIVOT, 534 TokenType.PRAGMA, 535 TokenType.PUT, 536 TokenType.RANGE, 537 TokenType.RECURSIVE, 538 TokenType.REFERENCES, 539 TokenType.REFRESH, 540 TokenType.RENAME, 541 TokenType.REPLACE, 542 TokenType.RIGHT, 543 TokenType.ROLLUP, 544 TokenType.ROW, 545 TokenType.ROWS, 546 TokenType.SEMI, 547 TokenType.SET, 548 TokenType.SETTINGS, 549 TokenType.SHOW, 550 TokenType.TEMPORARY, 551 TokenType.TOP, 552 TokenType.TRUE, 553 TokenType.TRUNCATE, 554 TokenType.UNIQUE, 555 TokenType.UNNEST, 556 TokenType.UNPIVOT, 557 TokenType.UPDATE, 558 TokenType.USE, 559 TokenType.VOLATILE, 560 TokenType.WINDOW, 561 *CREATABLES, 562 *SUBQUERY_PREDICATES, 563 *TYPE_TOKENS, 564 *NO_PAREN_FUNCTIONS, 565 } 566 ID_VAR_TOKENS.remove(TokenType.UNION) 567 568 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 569 TokenType.ANTI, 570 TokenType.APPLY, 571 TokenType.ASOF, 572 TokenType.FULL, 573 TokenType.LEFT, 574 TokenType.LOCK, 575 TokenType.NATURAL, 576 TokenType.RIGHT, 577 TokenType.SEMI, 578 TokenType.WINDOW, 579 } 580 581 ALIAS_TOKENS = ID_VAR_TOKENS 582 583 ARRAY_CONSTRUCTORS = { 584 "ARRAY": exp.Array, 585 "LIST": exp.List, 586 } 587 588 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 589 590 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 591 592 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 593 594 FUNC_TOKENS = { 595 TokenType.COLLATE, 596 TokenType.COMMAND, 597 TokenType.CURRENT_DATE, 598 TokenType.CURRENT_DATETIME, 599 TokenType.CURRENT_SCHEMA, 600 TokenType.CURRENT_TIMESTAMP, 601 TokenType.CURRENT_TIME, 602 TokenType.CURRENT_USER, 603 TokenType.FILTER, 604 TokenType.FIRST, 605 TokenType.FORMAT, 606 TokenType.GLOB, 607 TokenType.IDENTIFIER, 608 TokenType.INDEX, 609 TokenType.ISNULL, 610 TokenType.ILIKE, 611 TokenType.INSERT, 612 TokenType.LIKE, 613 TokenType.MERGE, 614 TokenType.NEXT, 615 TokenType.OFFSET, 616 TokenType.PRIMARY_KEY, 617 TokenType.RANGE, 618 TokenType.REPLACE, 619 TokenType.RLIKE, 620 TokenType.ROW, 621 TokenType.UNNEST, 622 TokenType.VAR, 623 TokenType.LEFT, 624 TokenType.RIGHT, 625 TokenType.SEQUENCE, 626 TokenType.DATE, 627 TokenType.DATETIME, 628 TokenType.TABLE, 629 TokenType.TIMESTAMP, 630 TokenType.TIMESTAMPTZ, 631 TokenType.TRUNCATE, 632 TokenType.WINDOW, 633 TokenType.XOR, 634 *TYPE_TOKENS, 635 *SUBQUERY_PREDICATES, 636 } 637 638 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 639 TokenType.AND: exp.And, 640 } 641 642 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 643 TokenType.COLON_EQ: exp.PropertyEQ, 644 } 645 646 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.OR: exp.Or, 648 } 649 650 EQUALITY = { 651 TokenType.EQ: exp.EQ, 652 TokenType.NEQ: exp.NEQ, 653 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 654 } 655 656 COMPARISON = { 657 TokenType.GT: exp.GT, 658 TokenType.GTE: exp.GTE, 659 TokenType.LT: exp.LT, 660 TokenType.LTE: exp.LTE, 661 } 662 663 BITWISE = { 664 TokenType.AMP: exp.BitwiseAnd, 665 TokenType.CARET: exp.BitwiseXor, 666 TokenType.PIPE: exp.BitwiseOr, 667 } 668 669 TERM = { 670 TokenType.DASH: exp.Sub, 671 TokenType.PLUS: exp.Add, 672 TokenType.MOD: exp.Mod, 673 TokenType.COLLATE: exp.Collate, 674 } 675 676 FACTOR = { 677 TokenType.DIV: exp.IntDiv, 678 TokenType.LR_ARROW: exp.Distance, 679 TokenType.SLASH: exp.Div, 680 TokenType.STAR: exp.Mul, 681 } 682 683 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 684 685 TIMES = { 686 TokenType.TIME, 687 TokenType.TIMETZ, 688 } 689 690 TIMESTAMPS = { 691 TokenType.TIMESTAMP, 692 TokenType.TIMESTAMPNTZ, 693 TokenType.TIMESTAMPTZ, 694 TokenType.TIMESTAMPLTZ, 695 *TIMES, 696 } 697 698 SET_OPERATIONS = { 699 TokenType.UNION, 700 TokenType.INTERSECT, 701 TokenType.EXCEPT, 702 } 703 704 JOIN_METHODS = { 705 TokenType.ASOF, 706 TokenType.NATURAL, 707 TokenType.POSITIONAL, 708 } 709 710 JOIN_SIDES = { 711 TokenType.LEFT, 712 TokenType.RIGHT, 713 TokenType.FULL, 714 } 715 716 JOIN_KINDS = { 717 TokenType.ANTI, 718 TokenType.CROSS, 719 TokenType.INNER, 720 TokenType.OUTER, 721 TokenType.SEMI, 722 TokenType.STRAIGHT_JOIN, 723 } 724 725 JOIN_HINTS: t.Set[str] = set() 726 727 LAMBDAS = { 728 TokenType.ARROW: lambda self, expressions: self.expression( 729 exp.Lambda, 730 this=self._replace_lambda( 731 self._parse_assignment(), 732 expressions, 733 ), 734 expressions=expressions, 735 ), 736 TokenType.FARROW: lambda self, expressions: self.expression( 737 exp.Kwarg, 738 this=exp.var(expressions[0].name), 739 expression=self._parse_assignment(), 740 ), 741 } 742 743 COLUMN_OPERATORS = { 744 TokenType.DOT: None, 745 TokenType.DOTCOLON: lambda self, this, to: self.expression( 746 exp.JSONCast, 747 this=this, 748 to=to, 749 ), 750 TokenType.DCOLON: lambda self, this, to: self.expression( 751 exp.Cast if self.STRICT_CAST else exp.TryCast, 752 this=this, 753 to=to, 754 ), 755 TokenType.ARROW: lambda self, this, path: self.expression( 756 exp.JSONExtract, 757 this=this, 758 expression=self.dialect.to_json_path(path), 759 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 760 ), 761 TokenType.DARROW: lambda self, this, path: self.expression( 762 exp.JSONExtractScalar, 763 this=this, 764 expression=self.dialect.to_json_path(path), 765 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 766 ), 767 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 768 exp.JSONBExtract, 769 this=this, 770 expression=path, 771 ), 772 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 773 exp.JSONBExtractScalar, 774 this=this, 775 expression=path, 776 ), 777 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 778 exp.JSONBContains, 779 this=this, 780 expression=key, 781 ), 782 } 783 784 EXPRESSION_PARSERS = { 785 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 786 exp.Column: lambda self: self._parse_column(), 787 exp.Condition: lambda self: self._parse_assignment(), 788 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 789 exp.Expression: lambda self: self._parse_expression(), 790 exp.From: lambda self: self._parse_from(joins=True), 791 exp.Group: lambda self: self._parse_group(), 792 exp.Having: lambda self: self._parse_having(), 793 exp.Hint: lambda self: self._parse_hint_body(), 794 exp.Identifier: lambda self: self._parse_id_var(), 795 exp.Join: lambda self: self._parse_join(), 796 exp.Lambda: lambda self: self._parse_lambda(), 797 exp.Lateral: lambda self: self._parse_lateral(), 798 exp.Limit: lambda self: self._parse_limit(), 799 exp.Offset: lambda self: self._parse_offset(), 800 exp.Order: lambda self: self._parse_order(), 801 exp.Ordered: lambda self: self._parse_ordered(), 802 exp.Properties: lambda self: self._parse_properties(), 803 exp.Qualify: lambda self: self._parse_qualify(), 804 exp.Returning: lambda self: self._parse_returning(), 805 exp.Select: lambda self: self._parse_select(), 806 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 807 exp.Table: lambda self: self._parse_table_parts(), 808 exp.TableAlias: lambda self: self._parse_table_alias(), 809 exp.Tuple: lambda self: self._parse_value(values=False), 810 exp.Whens: lambda self: self._parse_when_matched(), 811 exp.Where: lambda self: self._parse_where(), 812 exp.Window: lambda self: self._parse_named_window(), 813 exp.With: lambda self: self._parse_with(), 814 "JOIN_TYPE": lambda self: self._parse_join_parts(), 815 } 816 817 STATEMENT_PARSERS = { 818 TokenType.ALTER: lambda self: self._parse_alter(), 819 TokenType.ANALYZE: lambda self: self._parse_analyze(), 820 TokenType.BEGIN: lambda self: self._parse_transaction(), 821 TokenType.CACHE: lambda self: self._parse_cache(), 822 TokenType.COMMENT: lambda self: self._parse_comment(), 823 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 824 TokenType.COPY: lambda self: self._parse_copy(), 825 TokenType.CREATE: lambda self: self._parse_create(), 826 TokenType.DELETE: lambda self: self._parse_delete(), 827 TokenType.DESC: lambda self: self._parse_describe(), 828 TokenType.DESCRIBE: lambda self: self._parse_describe(), 829 TokenType.DROP: lambda self: self._parse_drop(), 830 TokenType.GRANT: lambda self: self._parse_grant(), 831 TokenType.INSERT: lambda self: self._parse_insert(), 832 TokenType.KILL: lambda self: self._parse_kill(), 833 TokenType.LOAD: lambda self: self._parse_load(), 834 TokenType.MERGE: lambda self: self._parse_merge(), 835 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 836 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 837 TokenType.REFRESH: lambda self: self._parse_refresh(), 838 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 839 TokenType.SET: lambda self: self._parse_set(), 840 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 841 TokenType.UNCACHE: lambda self: self._parse_uncache(), 842 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 843 TokenType.UPDATE: lambda self: self._parse_update(), 844 TokenType.USE: lambda self: self._parse_use(), 845 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 846 } 847 848 UNARY_PARSERS = { 849 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 850 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 851 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 852 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 853 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 854 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 855 } 856 857 STRING_PARSERS = { 858 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 859 exp.RawString, this=token.text 860 ), 861 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 862 exp.National, this=token.text 863 ), 864 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 865 TokenType.STRING: lambda self, token: self.expression( 866 exp.Literal, this=token.text, is_string=True 867 ), 868 TokenType.UNICODE_STRING: lambda self, token: self.expression( 869 exp.UnicodeString, 870 this=token.text, 871 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 872 ), 873 } 874 875 NUMERIC_PARSERS = { 876 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 877 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 878 TokenType.HEX_STRING: lambda self, token: self.expression( 879 exp.HexString, 880 this=token.text, 881 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 882 ), 883 TokenType.NUMBER: lambda self, token: self.expression( 884 exp.Literal, this=token.text, is_string=False 885 ), 886 } 887 888 PRIMARY_PARSERS = { 889 **STRING_PARSERS, 890 **NUMERIC_PARSERS, 891 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 892 TokenType.NULL: lambda self, _: self.expression(exp.Null), 893 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 894 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 895 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 896 TokenType.STAR: lambda self, _: self._parse_star_ops(), 897 } 898 899 PLACEHOLDER_PARSERS = { 900 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 901 TokenType.PARAMETER: lambda self: self._parse_parameter(), 902 TokenType.COLON: lambda self: ( 903 self.expression(exp.Placeholder, this=self._prev.text) 904 if self._match_set(self.ID_VAR_TOKENS) 905 else None 906 ), 907 } 908 909 RANGE_PARSERS = { 910 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 911 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 912 TokenType.GLOB: binary_range_parser(exp.Glob), 913 TokenType.ILIKE: binary_range_parser(exp.ILike), 914 TokenType.IN: lambda self, this: self._parse_in(this), 915 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 916 TokenType.IS: lambda self, this: self._parse_is(this), 917 TokenType.LIKE: binary_range_parser(exp.Like), 918 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 919 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 920 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 921 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 922 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 923 } 924 925 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 926 "ALLOWED_VALUES": lambda self: self.expression( 927 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 928 ), 929 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 930 "AUTO": lambda self: self._parse_auto_property(), 931 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 932 "BACKUP": lambda self: self.expression( 933 exp.BackupProperty, this=self._parse_var(any_token=True) 934 ), 935 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 936 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 937 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 938 "CHECKSUM": lambda self: self._parse_checksum(), 939 "CLUSTER BY": lambda self: self._parse_cluster(), 940 "CLUSTERED": lambda self: self._parse_clustered_by(), 941 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 942 exp.CollateProperty, **kwargs 943 ), 944 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 945 "CONTAINS": lambda self: self._parse_contains_property(), 946 "COPY": lambda self: self._parse_copy_property(), 947 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 948 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 949 "DEFINER": lambda self: self._parse_definer(), 950 "DETERMINISTIC": lambda self: self.expression( 951 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 952 ), 953 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 954 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 955 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 956 "DISTKEY": lambda self: self._parse_distkey(), 957 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 958 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 959 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 960 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 961 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 962 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 963 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 964 "FREESPACE": lambda self: self._parse_freespace(), 965 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 966 "HEAP": lambda self: self.expression(exp.HeapProperty), 967 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 968 "IMMUTABLE": lambda self: self.expression( 969 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 970 ), 971 "INHERITS": lambda self: self.expression( 972 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 973 ), 974 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 975 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 976 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 977 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 978 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 979 "LIKE": lambda self: self._parse_create_like(), 980 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 981 "LOCK": lambda self: self._parse_locking(), 982 "LOCKING": lambda self: self._parse_locking(), 983 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 984 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 985 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 986 "MODIFIES": lambda self: self._parse_modifies_property(), 987 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 988 "NO": lambda self: self._parse_no_property(), 989 "ON": lambda self: self._parse_on_property(), 990 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 991 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 992 "PARTITION": lambda self: self._parse_partitioned_of(), 993 "PARTITION BY": lambda self: self._parse_partitioned_by(), 994 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 995 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 996 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 997 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 998 "READS": lambda self: self._parse_reads_property(), 999 "REMOTE": lambda self: self._parse_remote_with_connection(), 1000 "RETURNS": lambda self: self._parse_returns(), 1001 "STRICT": lambda self: self.expression(exp.StrictProperty), 1002 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1003 "ROW": lambda self: self._parse_row(), 1004 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1005 "SAMPLE": lambda self: self.expression( 1006 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1007 ), 1008 "SECURE": lambda self: self.expression(exp.SecureProperty), 1009 "SECURITY": lambda self: self._parse_security(), 1010 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1011 "SETTINGS": lambda self: self._parse_settings_property(), 1012 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1013 "SORTKEY": lambda self: self._parse_sortkey(), 1014 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1015 "STABLE": lambda self: self.expression( 1016 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1017 ), 1018 "STORED": lambda self: self._parse_stored(), 1019 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1020 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1021 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1022 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1023 "TO": lambda self: self._parse_to_table(), 1024 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1025 "TRANSFORM": lambda self: self.expression( 1026 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1027 ), 1028 "TTL": lambda self: self._parse_ttl(), 1029 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1030 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1031 "VOLATILE": lambda self: self._parse_volatile_property(), 1032 "WITH": lambda self: self._parse_with_property(), 1033 } 1034 1035 CONSTRAINT_PARSERS = { 1036 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1037 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1038 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1039 "CHARACTER SET": lambda self: self.expression( 1040 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1041 ), 1042 "CHECK": lambda self: self.expression( 1043 exp.CheckColumnConstraint, 1044 this=self._parse_wrapped(self._parse_assignment), 1045 enforced=self._match_text_seq("ENFORCED"), 1046 ), 1047 "COLLATE": lambda self: self.expression( 1048 exp.CollateColumnConstraint, 1049 this=self._parse_identifier() or self._parse_column(), 1050 ), 1051 "COMMENT": lambda self: self.expression( 1052 exp.CommentColumnConstraint, this=self._parse_string() 1053 ), 1054 "COMPRESS": lambda self: self._parse_compress(), 1055 "CLUSTERED": lambda self: self.expression( 1056 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1057 ), 1058 "NONCLUSTERED": lambda self: self.expression( 1059 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1060 ), 1061 "DEFAULT": lambda self: self.expression( 1062 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1063 ), 1064 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1065 "EPHEMERAL": lambda self: self.expression( 1066 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1067 ), 1068 "EXCLUDE": lambda self: self.expression( 1069 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1070 ), 1071 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1072 "FORMAT": lambda self: self.expression( 1073 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1074 ), 1075 "GENERATED": lambda self: self._parse_generated_as_identity(), 1076 "IDENTITY": lambda self: self._parse_auto_increment(), 1077 "INLINE": lambda self: self._parse_inline(), 1078 "LIKE": lambda self: self._parse_create_like(), 1079 "NOT": lambda self: self._parse_not_constraint(), 1080 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1081 "ON": lambda self: ( 1082 self._match(TokenType.UPDATE) 1083 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1084 ) 1085 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1086 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1087 "PERIOD": lambda self: self._parse_period_for_system_time(), 1088 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1089 "REFERENCES": lambda self: self._parse_references(match=False), 1090 "TITLE": lambda self: self.expression( 1091 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1092 ), 1093 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1094 "UNIQUE": lambda self: self._parse_unique(), 1095 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1096 "WATERMARK": lambda self: self.expression( 1097 exp.WatermarkColumnConstraint, 1098 this=self._match(TokenType.FOR) and self._parse_column(), 1099 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1100 ), 1101 "WITH": lambda self: self.expression( 1102 exp.Properties, expressions=self._parse_wrapped_properties() 1103 ), 1104 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1105 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1106 } 1107 1108 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1109 klass = ( 1110 exp.PartitionedByBucket 1111 if self._prev.text.upper() == "BUCKET" 1112 else exp.PartitionByTruncate 1113 ) 1114 1115 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1116 this, expression = seq_get(args, 0), seq_get(args, 1) 1117 1118 if isinstance(this, exp.Literal): 1119 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1120 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1121 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1122 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1123 # 1124 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1125 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1126 this, expression = expression, this 1127 1128 return self.expression(klass, this=this, expression=expression) 1129 1130 ALTER_PARSERS = { 1131 "ADD": lambda self: self._parse_alter_table_add(), 1132 "AS": lambda self: self._parse_select(), 1133 "ALTER": lambda self: self._parse_alter_table_alter(), 1134 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1135 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1136 "DROP": lambda self: self._parse_alter_table_drop(), 1137 "RENAME": lambda self: self._parse_alter_table_rename(), 1138 "SET": lambda self: self._parse_alter_table_set(), 1139 "SWAP": lambda self: self.expression( 1140 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1141 ), 1142 } 1143 1144 ALTER_ALTER_PARSERS = { 1145 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1146 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1147 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1148 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1149 } 1150 1151 SCHEMA_UNNAMED_CONSTRAINTS = { 1152 "CHECK", 1153 "EXCLUDE", 1154 "FOREIGN KEY", 1155 "LIKE", 1156 "PERIOD", 1157 "PRIMARY KEY", 1158 "UNIQUE", 1159 "WATERMARK", 1160 "BUCKET", 1161 "TRUNCATE", 1162 } 1163 1164 NO_PAREN_FUNCTION_PARSERS = { 1165 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1166 "CASE": lambda self: self._parse_case(), 1167 "CONNECT_BY_ROOT": lambda self: self.expression( 1168 exp.ConnectByRoot, this=self._parse_column() 1169 ), 1170 "IF": lambda self: self._parse_if(), 1171 } 1172 1173 INVALID_FUNC_NAME_TOKENS = { 1174 TokenType.IDENTIFIER, 1175 TokenType.STRING, 1176 } 1177 1178 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1179 1180 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1181 1182 FUNCTION_PARSERS = { 1183 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1184 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1185 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1186 "DECODE": lambda self: self._parse_decode(), 1187 "EXTRACT": lambda self: self._parse_extract(), 1188 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1189 "GAP_FILL": lambda self: self._parse_gap_fill(), 1190 "JSON_OBJECT": lambda self: self._parse_json_object(), 1191 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1192 "JSON_TABLE": lambda self: self._parse_json_table(), 1193 "MATCH": lambda self: self._parse_match_against(), 1194 "NORMALIZE": lambda self: self._parse_normalize(), 1195 "OPENJSON": lambda self: self._parse_open_json(), 1196 "OVERLAY": lambda self: self._parse_overlay(), 1197 "POSITION": lambda self: self._parse_position(), 1198 "PREDICT": lambda self: self._parse_predict(), 1199 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1200 "STRING_AGG": lambda self: self._parse_string_agg(), 1201 "SUBSTRING": lambda self: self._parse_substring(), 1202 "TRIM": lambda self: self._parse_trim(), 1203 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1204 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1205 "XMLELEMENT": lambda self: self.expression( 1206 exp.XMLElement, 1207 this=self._match_text_seq("NAME") and self._parse_id_var(), 1208 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1209 ), 1210 "XMLTABLE": lambda self: self._parse_xml_table(), 1211 } 1212 1213 QUERY_MODIFIER_PARSERS = { 1214 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1215 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1216 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1217 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1218 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1219 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1220 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1221 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1222 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1223 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1224 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1225 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1226 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1227 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1228 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1229 TokenType.CLUSTER_BY: lambda self: ( 1230 "cluster", 1231 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1232 ), 1233 TokenType.DISTRIBUTE_BY: lambda self: ( 1234 "distribute", 1235 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1236 ), 1237 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1238 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1239 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1240 } 1241 1242 SET_PARSERS = { 1243 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1244 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1245 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1246 "TRANSACTION": lambda self: self._parse_set_transaction(), 1247 } 1248 1249 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1250 1251 TYPE_LITERAL_PARSERS = { 1252 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1253 } 1254 1255 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1256 1257 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1258 1259 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1260 1261 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1262 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1263 "ISOLATION": ( 1264 ("LEVEL", "REPEATABLE", "READ"), 1265 ("LEVEL", "READ", "COMMITTED"), 1266 ("LEVEL", "READ", "UNCOMITTED"), 1267 ("LEVEL", "SERIALIZABLE"), 1268 ), 1269 "READ": ("WRITE", "ONLY"), 1270 } 1271 1272 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1273 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1274 ) 1275 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1276 1277 CREATE_SEQUENCE: OPTIONS_TYPE = { 1278 "SCALE": ("EXTEND", "NOEXTEND"), 1279 "SHARD": ("EXTEND", "NOEXTEND"), 1280 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1281 **dict.fromkeys( 1282 ( 1283 "SESSION", 1284 "GLOBAL", 1285 "KEEP", 1286 "NOKEEP", 1287 "ORDER", 1288 "NOORDER", 1289 "NOCACHE", 1290 "CYCLE", 1291 "NOCYCLE", 1292 "NOMINVALUE", 1293 "NOMAXVALUE", 1294 "NOSCALE", 1295 "NOSHARD", 1296 ), 1297 tuple(), 1298 ), 1299 } 1300 1301 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1302 1303 USABLES: OPTIONS_TYPE = dict.fromkeys( 1304 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1305 ) 1306 1307 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1308 1309 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1310 "TYPE": ("EVOLUTION",), 1311 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1312 } 1313 1314 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1315 1316 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1317 1318 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1319 "NOT": ("ENFORCED",), 1320 "MATCH": ( 1321 "FULL", 1322 "PARTIAL", 1323 "SIMPLE", 1324 ), 1325 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1326 "USING": ( 1327 "BTREE", 1328 "HASH", 1329 ), 1330 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1331 } 1332 1333 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1334 1335 CLONE_KEYWORDS = {"CLONE", "COPY"} 1336 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1337 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1338 1339 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1340 1341 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1342 1343 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1344 1345 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1346 1347 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1348 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1349 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1350 1351 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1352 1353 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1354 1355 ADD_CONSTRAINT_TOKENS = { 1356 TokenType.CONSTRAINT, 1357 TokenType.FOREIGN_KEY, 1358 TokenType.INDEX, 1359 TokenType.KEY, 1360 TokenType.PRIMARY_KEY, 1361 TokenType.UNIQUE, 1362 } 1363 1364 DISTINCT_TOKENS = {TokenType.DISTINCT} 1365 1366 NULL_TOKENS = {TokenType.NULL} 1367 1368 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1369 1370 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1371 1372 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1373 1374 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1375 1376 ODBC_DATETIME_LITERALS = { 1377 "d": exp.Date, 1378 "t": exp.Time, 1379 "ts": exp.Timestamp, 1380 } 1381 1382 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1383 1384 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1385 1386 # The style options for the DESCRIBE statement 1387 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1388 1389 # The style options for the ANALYZE statement 1390 ANALYZE_STYLES = { 1391 "BUFFER_USAGE_LIMIT", 1392 "FULL", 1393 "LOCAL", 1394 "NO_WRITE_TO_BINLOG", 1395 "SAMPLE", 1396 "SKIP_LOCKED", 1397 "VERBOSE", 1398 } 1399 1400 ANALYZE_EXPRESSION_PARSERS = { 1401 "ALL": lambda self: self._parse_analyze_columns(), 1402 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1403 "DELETE": lambda self: self._parse_analyze_delete(), 1404 "DROP": lambda self: self._parse_analyze_histogram(), 1405 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1406 "LIST": lambda self: self._parse_analyze_list(), 1407 "PREDICATE": lambda self: self._parse_analyze_columns(), 1408 "UPDATE": lambda self: self._parse_analyze_histogram(), 1409 "VALIDATE": lambda self: self._parse_analyze_validate(), 1410 } 1411 1412 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1413 1414 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1415 1416 OPERATION_MODIFIERS: t.Set[str] = set() 1417 1418 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1419 1420 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1421 1422 STRICT_CAST = True 1423 1424 PREFIXED_PIVOT_COLUMNS = False 1425 IDENTIFY_PIVOT_STRINGS = False 1426 1427 LOG_DEFAULTS_TO_LN = False 1428 1429 # Whether ADD is present for each column added by ALTER TABLE 1430 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1431 1432 # Whether the table sample clause expects CSV syntax 1433 TABLESAMPLE_CSV = False 1434 1435 # The default method used for table sampling 1436 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1437 1438 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1439 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1440 1441 # Whether the TRIM function expects the characters to trim as its first argument 1442 TRIM_PATTERN_FIRST = False 1443 1444 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1445 STRING_ALIASES = False 1446 1447 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1448 MODIFIERS_ATTACHED_TO_SET_OP = True 1449 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1450 1451 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1452 NO_PAREN_IF_COMMANDS = True 1453 1454 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1455 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1456 1457 # Whether the `:` operator is used to extract a value from a VARIANT column 1458 COLON_IS_VARIANT_EXTRACT = False 1459 1460 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1461 # If this is True and '(' is not found, the keyword will be treated as an identifier 1462 VALUES_FOLLOWED_BY_PAREN = True 1463 1464 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1465 SUPPORTS_IMPLICIT_UNNEST = False 1466 1467 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1468 INTERVAL_SPANS = True 1469 1470 # Whether a PARTITION clause can follow a table reference 1471 SUPPORTS_PARTITION_SELECTION = False 1472 1473 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1474 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1475 1476 # Whether the 'AS' keyword is optional in the CTE definition syntax 1477 OPTIONAL_ALIAS_TOKEN_CTE = True 1478 1479 __slots__ = ( 1480 "error_level", 1481 "error_message_context", 1482 "max_errors", 1483 "dialect", 1484 "sql", 1485 "errors", 1486 "_tokens", 1487 "_index", 1488 "_curr", 1489 "_next", 1490 "_prev", 1491 "_prev_comments", 1492 ) 1493 1494 # Autofilled 1495 SHOW_TRIE: t.Dict = {} 1496 SET_TRIE: t.Dict = {} 1497 1498 def __init__( 1499 self, 1500 error_level: t.Optional[ErrorLevel] = None, 1501 error_message_context: int = 100, 1502 max_errors: int = 3, 1503 dialect: DialectType = None, 1504 ): 1505 from sqlglot.dialects import Dialect 1506 1507 self.error_level = error_level or ErrorLevel.IMMEDIATE 1508 self.error_message_context = error_message_context 1509 self.max_errors = max_errors 1510 self.dialect = Dialect.get_or_raise(dialect) 1511 self.reset() 1512 1513 def reset(self): 1514 self.sql = "" 1515 self.errors = [] 1516 self._tokens = [] 1517 self._index = 0 1518 self._curr = None 1519 self._next = None 1520 self._prev = None 1521 self._prev_comments = None 1522 1523 def parse( 1524 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1525 ) -> t.List[t.Optional[exp.Expression]]: 1526 """ 1527 Parses a list of tokens and returns a list of syntax trees, one tree 1528 per parsed SQL statement. 1529 1530 Args: 1531 raw_tokens: The list of tokens. 1532 sql: The original SQL string, used to produce helpful debug messages. 1533 1534 Returns: 1535 The list of the produced syntax trees. 1536 """ 1537 return self._parse( 1538 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1539 ) 1540 1541 def parse_into( 1542 self, 1543 expression_types: exp.IntoType, 1544 raw_tokens: t.List[Token], 1545 sql: t.Optional[str] = None, 1546 ) -> t.List[t.Optional[exp.Expression]]: 1547 """ 1548 Parses a list of tokens into a given Expression type. If a collection of Expression 1549 types is given instead, this method will try to parse the token list into each one 1550 of them, stopping at the first for which the parsing succeeds. 1551 1552 Args: 1553 expression_types: The expression type(s) to try and parse the token list into. 1554 raw_tokens: The list of tokens. 1555 sql: The original SQL string, used to produce helpful debug messages. 1556 1557 Returns: 1558 The target Expression. 1559 """ 1560 errors = [] 1561 for expression_type in ensure_list(expression_types): 1562 parser = self.EXPRESSION_PARSERS.get(expression_type) 1563 if not parser: 1564 raise TypeError(f"No parser registered for {expression_type}") 1565 1566 try: 1567 return self._parse(parser, raw_tokens, sql) 1568 except ParseError as e: 1569 e.errors[0]["into_expression"] = expression_type 1570 errors.append(e) 1571 1572 raise ParseError( 1573 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1574 errors=merge_errors(errors), 1575 ) from errors[-1] 1576 1577 def _parse( 1578 self, 1579 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1580 raw_tokens: t.List[Token], 1581 sql: t.Optional[str] = None, 1582 ) -> t.List[t.Optional[exp.Expression]]: 1583 self.reset() 1584 self.sql = sql or "" 1585 1586 total = len(raw_tokens) 1587 chunks: t.List[t.List[Token]] = [[]] 1588 1589 for i, token in enumerate(raw_tokens): 1590 if token.token_type == TokenType.SEMICOLON: 1591 if token.comments: 1592 chunks.append([token]) 1593 1594 if i < total - 1: 1595 chunks.append([]) 1596 else: 1597 chunks[-1].append(token) 1598 1599 expressions = [] 1600 1601 for tokens in chunks: 1602 self._index = -1 1603 self._tokens = tokens 1604 self._advance() 1605 1606 expressions.append(parse_method(self)) 1607 1608 if self._index < len(self._tokens): 1609 self.raise_error("Invalid expression / Unexpected token") 1610 1611 self.check_errors() 1612 1613 return expressions 1614 1615 def check_errors(self) -> None: 1616 """Logs or raises any found errors, depending on the chosen error level setting.""" 1617 if self.error_level == ErrorLevel.WARN: 1618 for error in self.errors: 1619 logger.error(str(error)) 1620 elif self.error_level == ErrorLevel.RAISE and self.errors: 1621 raise ParseError( 1622 concat_messages(self.errors, self.max_errors), 1623 errors=merge_errors(self.errors), 1624 ) 1625 1626 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1627 """ 1628 Appends an error in the list of recorded errors or raises it, depending on the chosen 1629 error level setting. 1630 """ 1631 token = token or self._curr or self._prev or Token.string("") 1632 start = token.start 1633 end = token.end + 1 1634 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1635 highlight = self.sql[start:end] 1636 end_context = self.sql[end : end + self.error_message_context] 1637 1638 error = ParseError.new( 1639 f"{message}. Line {token.line}, Col: {token.col}.\n" 1640 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1641 description=message, 1642 line=token.line, 1643 col=token.col, 1644 start_context=start_context, 1645 highlight=highlight, 1646 end_context=end_context, 1647 ) 1648 1649 if self.error_level == ErrorLevel.IMMEDIATE: 1650 raise error 1651 1652 self.errors.append(error) 1653 1654 def expression( 1655 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1656 ) -> E: 1657 """ 1658 Creates a new, validated Expression. 1659 1660 Args: 1661 exp_class: The expression class to instantiate. 1662 comments: An optional list of comments to attach to the expression. 1663 kwargs: The arguments to set for the expression along with their respective values. 1664 1665 Returns: 1666 The target expression. 1667 """ 1668 instance = exp_class(**kwargs) 1669 instance.add_comments(comments) if comments else self._add_comments(instance) 1670 return self.validate_expression(instance) 1671 1672 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1673 if expression and self._prev_comments: 1674 expression.add_comments(self._prev_comments) 1675 self._prev_comments = None 1676 1677 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1678 """ 1679 Validates an Expression, making sure that all its mandatory arguments are set. 1680 1681 Args: 1682 expression: The expression to validate. 1683 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1684 1685 Returns: 1686 The validated expression. 1687 """ 1688 if self.error_level != ErrorLevel.IGNORE: 1689 for error_message in expression.error_messages(args): 1690 self.raise_error(error_message) 1691 1692 return expression 1693 1694 def _find_sql(self, start: Token, end: Token) -> str: 1695 return self.sql[start.start : end.end + 1] 1696 1697 def _is_connected(self) -> bool: 1698 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1699 1700 def _advance(self, times: int = 1) -> None: 1701 self._index += times 1702 self._curr = seq_get(self._tokens, self._index) 1703 self._next = seq_get(self._tokens, self._index + 1) 1704 1705 if self._index > 0: 1706 self._prev = self._tokens[self._index - 1] 1707 self._prev_comments = self._prev.comments 1708 else: 1709 self._prev = None 1710 self._prev_comments = None 1711 1712 def _retreat(self, index: int) -> None: 1713 if index != self._index: 1714 self._advance(index - self._index) 1715 1716 def _warn_unsupported(self) -> None: 1717 if len(self._tokens) <= 1: 1718 return 1719 1720 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1721 # interested in emitting a warning for the one being currently processed. 1722 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1723 1724 logger.warning( 1725 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1726 ) 1727 1728 def _parse_command(self) -> exp.Command: 1729 self._warn_unsupported() 1730 return self.expression( 1731 exp.Command, 1732 comments=self._prev_comments, 1733 this=self._prev.text.upper(), 1734 expression=self._parse_string(), 1735 ) 1736 1737 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1738 """ 1739 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1740 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1741 solve this by setting & resetting the parser state accordingly 1742 """ 1743 index = self._index 1744 error_level = self.error_level 1745 1746 self.error_level = ErrorLevel.IMMEDIATE 1747 try: 1748 this = parse_method() 1749 except ParseError: 1750 this = None 1751 finally: 1752 if not this or retreat: 1753 self._retreat(index) 1754 self.error_level = error_level 1755 1756 return this 1757 1758 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1759 start = self._prev 1760 exists = self._parse_exists() if allow_exists else None 1761 1762 self._match(TokenType.ON) 1763 1764 materialized = self._match_text_seq("MATERIALIZED") 1765 kind = self._match_set(self.CREATABLES) and self._prev 1766 if not kind: 1767 return self._parse_as_command(start) 1768 1769 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1770 this = self._parse_user_defined_function(kind=kind.token_type) 1771 elif kind.token_type == TokenType.TABLE: 1772 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1773 elif kind.token_type == TokenType.COLUMN: 1774 this = self._parse_column() 1775 else: 1776 this = self._parse_id_var() 1777 1778 self._match(TokenType.IS) 1779 1780 return self.expression( 1781 exp.Comment, 1782 this=this, 1783 kind=kind.text, 1784 expression=self._parse_string(), 1785 exists=exists, 1786 materialized=materialized, 1787 ) 1788 1789 def _parse_to_table( 1790 self, 1791 ) -> exp.ToTableProperty: 1792 table = self._parse_table_parts(schema=True) 1793 return self.expression(exp.ToTableProperty, this=table) 1794 1795 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1796 def _parse_ttl(self) -> exp.Expression: 1797 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1798 this = self._parse_bitwise() 1799 1800 if self._match_text_seq("DELETE"): 1801 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1802 if self._match_text_seq("RECOMPRESS"): 1803 return self.expression( 1804 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1805 ) 1806 if self._match_text_seq("TO", "DISK"): 1807 return self.expression( 1808 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1809 ) 1810 if self._match_text_seq("TO", "VOLUME"): 1811 return self.expression( 1812 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1813 ) 1814 1815 return this 1816 1817 expressions = self._parse_csv(_parse_ttl_action) 1818 where = self._parse_where() 1819 group = self._parse_group() 1820 1821 aggregates = None 1822 if group and self._match(TokenType.SET): 1823 aggregates = self._parse_csv(self._parse_set_item) 1824 1825 return self.expression( 1826 exp.MergeTreeTTL, 1827 expressions=expressions, 1828 where=where, 1829 group=group, 1830 aggregates=aggregates, 1831 ) 1832 1833 def _parse_statement(self) -> t.Optional[exp.Expression]: 1834 if self._curr is None: 1835 return None 1836 1837 if self._match_set(self.STATEMENT_PARSERS): 1838 comments = self._prev_comments 1839 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1840 stmt.add_comments(comments, prepend=True) 1841 return stmt 1842 1843 if self._match_set(self.dialect.tokenizer.COMMANDS): 1844 return self._parse_command() 1845 1846 expression = self._parse_expression() 1847 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1848 return self._parse_query_modifiers(expression) 1849 1850 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1851 start = self._prev 1852 temporary = self._match(TokenType.TEMPORARY) 1853 materialized = self._match_text_seq("MATERIALIZED") 1854 1855 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1856 if not kind: 1857 return self._parse_as_command(start) 1858 1859 concurrently = self._match_text_seq("CONCURRENTLY") 1860 if_exists = exists or self._parse_exists() 1861 1862 if kind == "COLUMN": 1863 this = self._parse_column() 1864 else: 1865 this = self._parse_table_parts( 1866 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1867 ) 1868 1869 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1870 1871 if self._match(TokenType.L_PAREN, advance=False): 1872 expressions = self._parse_wrapped_csv(self._parse_types) 1873 else: 1874 expressions = None 1875 1876 return self.expression( 1877 exp.Drop, 1878 exists=if_exists, 1879 this=this, 1880 expressions=expressions, 1881 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1882 temporary=temporary, 1883 materialized=materialized, 1884 cascade=self._match_text_seq("CASCADE"), 1885 constraints=self._match_text_seq("CONSTRAINTS"), 1886 purge=self._match_text_seq("PURGE"), 1887 cluster=cluster, 1888 concurrently=concurrently, 1889 ) 1890 1891 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1892 return ( 1893 self._match_text_seq("IF") 1894 and (not not_ or self._match(TokenType.NOT)) 1895 and self._match(TokenType.EXISTS) 1896 ) 1897 1898 def _parse_create(self) -> exp.Create | exp.Command: 1899 # Note: this can't be None because we've matched a statement parser 1900 start = self._prev 1901 1902 replace = ( 1903 start.token_type == TokenType.REPLACE 1904 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1905 or self._match_pair(TokenType.OR, TokenType.ALTER) 1906 ) 1907 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1908 1909 unique = self._match(TokenType.UNIQUE) 1910 1911 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1912 clustered = True 1913 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1914 "COLUMNSTORE" 1915 ): 1916 clustered = False 1917 else: 1918 clustered = None 1919 1920 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1921 self._advance() 1922 1923 properties = None 1924 create_token = self._match_set(self.CREATABLES) and self._prev 1925 1926 if not create_token: 1927 # exp.Properties.Location.POST_CREATE 1928 properties = self._parse_properties() 1929 create_token = self._match_set(self.CREATABLES) and self._prev 1930 1931 if not properties or not create_token: 1932 return self._parse_as_command(start) 1933 1934 concurrently = self._match_text_seq("CONCURRENTLY") 1935 exists = self._parse_exists(not_=True) 1936 this = None 1937 expression: t.Optional[exp.Expression] = None 1938 indexes = None 1939 no_schema_binding = None 1940 begin = None 1941 end = None 1942 clone = None 1943 1944 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1945 nonlocal properties 1946 if properties and temp_props: 1947 properties.expressions.extend(temp_props.expressions) 1948 elif temp_props: 1949 properties = temp_props 1950 1951 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1952 this = self._parse_user_defined_function(kind=create_token.token_type) 1953 1954 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1955 extend_props(self._parse_properties()) 1956 1957 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1958 extend_props(self._parse_properties()) 1959 1960 if not expression: 1961 if self._match(TokenType.COMMAND): 1962 expression = self._parse_as_command(self._prev) 1963 else: 1964 begin = self._match(TokenType.BEGIN) 1965 return_ = self._match_text_seq("RETURN") 1966 1967 if self._match(TokenType.STRING, advance=False): 1968 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1969 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1970 expression = self._parse_string() 1971 extend_props(self._parse_properties()) 1972 else: 1973 expression = self._parse_user_defined_function_expression() 1974 1975 end = self._match_text_seq("END") 1976 1977 if return_: 1978 expression = self.expression(exp.Return, this=expression) 1979 elif create_token.token_type == TokenType.INDEX: 1980 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1981 if not self._match(TokenType.ON): 1982 index = self._parse_id_var() 1983 anonymous = False 1984 else: 1985 index = None 1986 anonymous = True 1987 1988 this = self._parse_index(index=index, anonymous=anonymous) 1989 elif create_token.token_type in self.DB_CREATABLES: 1990 table_parts = self._parse_table_parts( 1991 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1992 ) 1993 1994 # exp.Properties.Location.POST_NAME 1995 self._match(TokenType.COMMA) 1996 extend_props(self._parse_properties(before=True)) 1997 1998 this = self._parse_schema(this=table_parts) 1999 2000 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2001 extend_props(self._parse_properties()) 2002 2003 self._match(TokenType.ALIAS) 2004 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2005 # exp.Properties.Location.POST_ALIAS 2006 extend_props(self._parse_properties()) 2007 2008 if create_token.token_type == TokenType.SEQUENCE: 2009 expression = self._parse_types() 2010 extend_props(self._parse_properties()) 2011 else: 2012 expression = self._parse_ddl_select() 2013 2014 if create_token.token_type == TokenType.TABLE: 2015 # exp.Properties.Location.POST_EXPRESSION 2016 extend_props(self._parse_properties()) 2017 2018 indexes = [] 2019 while True: 2020 index = self._parse_index() 2021 2022 # exp.Properties.Location.POST_INDEX 2023 extend_props(self._parse_properties()) 2024 if not index: 2025 break 2026 else: 2027 self._match(TokenType.COMMA) 2028 indexes.append(index) 2029 elif create_token.token_type == TokenType.VIEW: 2030 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2031 no_schema_binding = True 2032 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2033 extend_props(self._parse_properties()) 2034 2035 shallow = self._match_text_seq("SHALLOW") 2036 2037 if self._match_texts(self.CLONE_KEYWORDS): 2038 copy = self._prev.text.lower() == "copy" 2039 clone = self.expression( 2040 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2041 ) 2042 2043 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2044 return self._parse_as_command(start) 2045 2046 create_kind_text = create_token.text.upper() 2047 return self.expression( 2048 exp.Create, 2049 this=this, 2050 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2051 replace=replace, 2052 refresh=refresh, 2053 unique=unique, 2054 expression=expression, 2055 exists=exists, 2056 properties=properties, 2057 indexes=indexes, 2058 no_schema_binding=no_schema_binding, 2059 begin=begin, 2060 end=end, 2061 clone=clone, 2062 concurrently=concurrently, 2063 clustered=clustered, 2064 ) 2065 2066 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2067 seq = exp.SequenceProperties() 2068 2069 options = [] 2070 index = self._index 2071 2072 while self._curr: 2073 self._match(TokenType.COMMA) 2074 if self._match_text_seq("INCREMENT"): 2075 self._match_text_seq("BY") 2076 self._match_text_seq("=") 2077 seq.set("increment", self._parse_term()) 2078 elif self._match_text_seq("MINVALUE"): 2079 seq.set("minvalue", self._parse_term()) 2080 elif self._match_text_seq("MAXVALUE"): 2081 seq.set("maxvalue", self._parse_term()) 2082 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2083 self._match_text_seq("=") 2084 seq.set("start", self._parse_term()) 2085 elif self._match_text_seq("CACHE"): 2086 # T-SQL allows empty CACHE which is initialized dynamically 2087 seq.set("cache", self._parse_number() or True) 2088 elif self._match_text_seq("OWNED", "BY"): 2089 # "OWNED BY NONE" is the default 2090 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2091 else: 2092 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2093 if opt: 2094 options.append(opt) 2095 else: 2096 break 2097 2098 seq.set("options", options if options else None) 2099 return None if self._index == index else seq 2100 2101 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2102 # only used for teradata currently 2103 self._match(TokenType.COMMA) 2104 2105 kwargs = { 2106 "no": self._match_text_seq("NO"), 2107 "dual": self._match_text_seq("DUAL"), 2108 "before": self._match_text_seq("BEFORE"), 2109 "default": self._match_text_seq("DEFAULT"), 2110 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2111 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2112 "after": self._match_text_seq("AFTER"), 2113 "minimum": self._match_texts(("MIN", "MINIMUM")), 2114 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2115 } 2116 2117 if self._match_texts(self.PROPERTY_PARSERS): 2118 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2119 try: 2120 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2121 except TypeError: 2122 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2123 2124 return None 2125 2126 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2127 return self._parse_wrapped_csv(self._parse_property) 2128 2129 def _parse_property(self) -> t.Optional[exp.Expression]: 2130 if self._match_texts(self.PROPERTY_PARSERS): 2131 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2132 2133 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2134 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2135 2136 if self._match_text_seq("COMPOUND", "SORTKEY"): 2137 return self._parse_sortkey(compound=True) 2138 2139 if self._match_text_seq("SQL", "SECURITY"): 2140 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2141 2142 index = self._index 2143 key = self._parse_column() 2144 2145 if not self._match(TokenType.EQ): 2146 self._retreat(index) 2147 return self._parse_sequence_properties() 2148 2149 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2150 if isinstance(key, exp.Column): 2151 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2152 2153 value = self._parse_bitwise() or self._parse_var(any_token=True) 2154 2155 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2156 if isinstance(value, exp.Column): 2157 value = exp.var(value.name) 2158 2159 return self.expression(exp.Property, this=key, value=value) 2160 2161 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2162 if self._match_text_seq("BY"): 2163 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2164 2165 self._match(TokenType.ALIAS) 2166 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2167 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2168 2169 return self.expression( 2170 exp.FileFormatProperty, 2171 this=( 2172 self.expression( 2173 exp.InputOutputFormat, 2174 input_format=input_format, 2175 output_format=output_format, 2176 ) 2177 if input_format or output_format 2178 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2179 ), 2180 ) 2181 2182 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2183 field = self._parse_field() 2184 if isinstance(field, exp.Identifier) and not field.quoted: 2185 field = exp.var(field) 2186 2187 return field 2188 2189 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2190 self._match(TokenType.EQ) 2191 self._match(TokenType.ALIAS) 2192 2193 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2194 2195 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2196 properties = [] 2197 while True: 2198 if before: 2199 prop = self._parse_property_before() 2200 else: 2201 prop = self._parse_property() 2202 if not prop: 2203 break 2204 for p in ensure_list(prop): 2205 properties.append(p) 2206 2207 if properties: 2208 return self.expression(exp.Properties, expressions=properties) 2209 2210 return None 2211 2212 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2213 return self.expression( 2214 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2215 ) 2216 2217 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2218 if self._match_texts(("DEFINER", "INVOKER")): 2219 security_specifier = self._prev.text.upper() 2220 return self.expression(exp.SecurityProperty, this=security_specifier) 2221 return None 2222 2223 def _parse_settings_property(self) -> exp.SettingsProperty: 2224 return self.expression( 2225 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2226 ) 2227 2228 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2229 if self._index >= 2: 2230 pre_volatile_token = self._tokens[self._index - 2] 2231 else: 2232 pre_volatile_token = None 2233 2234 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2235 return exp.VolatileProperty() 2236 2237 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2238 2239 def _parse_retention_period(self) -> exp.Var: 2240 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2241 number = self._parse_number() 2242 number_str = f"{number} " if number else "" 2243 unit = self._parse_var(any_token=True) 2244 return exp.var(f"{number_str}{unit}") 2245 2246 def _parse_system_versioning_property( 2247 self, with_: bool = False 2248 ) -> exp.WithSystemVersioningProperty: 2249 self._match(TokenType.EQ) 2250 prop = self.expression( 2251 exp.WithSystemVersioningProperty, 2252 **{ # type: ignore 2253 "on": True, 2254 "with": with_, 2255 }, 2256 ) 2257 2258 if self._match_text_seq("OFF"): 2259 prop.set("on", False) 2260 return prop 2261 2262 self._match(TokenType.ON) 2263 if self._match(TokenType.L_PAREN): 2264 while self._curr and not self._match(TokenType.R_PAREN): 2265 if self._match_text_seq("HISTORY_TABLE", "="): 2266 prop.set("this", self._parse_table_parts()) 2267 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2268 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2269 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2270 prop.set("retention_period", self._parse_retention_period()) 2271 2272 self._match(TokenType.COMMA) 2273 2274 return prop 2275 2276 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2277 self._match(TokenType.EQ) 2278 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2279 prop = self.expression(exp.DataDeletionProperty, on=on) 2280 2281 if self._match(TokenType.L_PAREN): 2282 while self._curr and not self._match(TokenType.R_PAREN): 2283 if self._match_text_seq("FILTER_COLUMN", "="): 2284 prop.set("filter_column", self._parse_column()) 2285 elif self._match_text_seq("RETENTION_PERIOD", "="): 2286 prop.set("retention_period", self._parse_retention_period()) 2287 2288 self._match(TokenType.COMMA) 2289 2290 return prop 2291 2292 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2293 kind = "HASH" 2294 expressions: t.Optional[t.List[exp.Expression]] = None 2295 if self._match_text_seq("BY", "HASH"): 2296 expressions = self._parse_wrapped_csv(self._parse_id_var) 2297 elif self._match_text_seq("BY", "RANDOM"): 2298 kind = "RANDOM" 2299 2300 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2301 buckets: t.Optional[exp.Expression] = None 2302 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2303 buckets = self._parse_number() 2304 2305 return self.expression( 2306 exp.DistributedByProperty, 2307 expressions=expressions, 2308 kind=kind, 2309 buckets=buckets, 2310 order=self._parse_order(), 2311 ) 2312 2313 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2314 self._match_text_seq("KEY") 2315 expressions = self._parse_wrapped_id_vars() 2316 return self.expression(expr_type, expressions=expressions) 2317 2318 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2319 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2320 prop = self._parse_system_versioning_property(with_=True) 2321 self._match_r_paren() 2322 return prop 2323 2324 if self._match(TokenType.L_PAREN, advance=False): 2325 return self._parse_wrapped_properties() 2326 2327 if self._match_text_seq("JOURNAL"): 2328 return self._parse_withjournaltable() 2329 2330 if self._match_texts(self.VIEW_ATTRIBUTES): 2331 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2332 2333 if self._match_text_seq("DATA"): 2334 return self._parse_withdata(no=False) 2335 elif self._match_text_seq("NO", "DATA"): 2336 return self._parse_withdata(no=True) 2337 2338 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2339 return self._parse_serde_properties(with_=True) 2340 2341 if self._match(TokenType.SCHEMA): 2342 return self.expression( 2343 exp.WithSchemaBindingProperty, 2344 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2345 ) 2346 2347 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2348 return self.expression( 2349 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2350 ) 2351 2352 if not self._next: 2353 return None 2354 2355 return self._parse_withisolatedloading() 2356 2357 def _parse_procedure_option(self) -> exp.Expression | None: 2358 if self._match_text_seq("EXECUTE", "AS"): 2359 return self.expression( 2360 exp.ExecuteAsProperty, 2361 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2362 or self._parse_string(), 2363 ) 2364 2365 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2366 2367 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2368 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2369 self._match(TokenType.EQ) 2370 2371 user = self._parse_id_var() 2372 self._match(TokenType.PARAMETER) 2373 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2374 2375 if not user or not host: 2376 return None 2377 2378 return exp.DefinerProperty(this=f"{user}@{host}") 2379 2380 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2381 self._match(TokenType.TABLE) 2382 self._match(TokenType.EQ) 2383 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2384 2385 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2386 return self.expression(exp.LogProperty, no=no) 2387 2388 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2389 return self.expression(exp.JournalProperty, **kwargs) 2390 2391 def _parse_checksum(self) -> exp.ChecksumProperty: 2392 self._match(TokenType.EQ) 2393 2394 on = None 2395 if self._match(TokenType.ON): 2396 on = True 2397 elif self._match_text_seq("OFF"): 2398 on = False 2399 2400 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2401 2402 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2403 return self.expression( 2404 exp.Cluster, 2405 expressions=( 2406 self._parse_wrapped_csv(self._parse_ordered) 2407 if wrapped 2408 else self._parse_csv(self._parse_ordered) 2409 ), 2410 ) 2411 2412 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2413 self._match_text_seq("BY") 2414 2415 self._match_l_paren() 2416 expressions = self._parse_csv(self._parse_column) 2417 self._match_r_paren() 2418 2419 if self._match_text_seq("SORTED", "BY"): 2420 self._match_l_paren() 2421 sorted_by = self._parse_csv(self._parse_ordered) 2422 self._match_r_paren() 2423 else: 2424 sorted_by = None 2425 2426 self._match(TokenType.INTO) 2427 buckets = self._parse_number() 2428 self._match_text_seq("BUCKETS") 2429 2430 return self.expression( 2431 exp.ClusteredByProperty, 2432 expressions=expressions, 2433 sorted_by=sorted_by, 2434 buckets=buckets, 2435 ) 2436 2437 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2438 if not self._match_text_seq("GRANTS"): 2439 self._retreat(self._index - 1) 2440 return None 2441 2442 return self.expression(exp.CopyGrantsProperty) 2443 2444 def _parse_freespace(self) -> exp.FreespaceProperty: 2445 self._match(TokenType.EQ) 2446 return self.expression( 2447 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2448 ) 2449 2450 def _parse_mergeblockratio( 2451 self, no: bool = False, default: bool = False 2452 ) -> exp.MergeBlockRatioProperty: 2453 if self._match(TokenType.EQ): 2454 return self.expression( 2455 exp.MergeBlockRatioProperty, 2456 this=self._parse_number(), 2457 percent=self._match(TokenType.PERCENT), 2458 ) 2459 2460 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2461 2462 def _parse_datablocksize( 2463 self, 2464 default: t.Optional[bool] = None, 2465 minimum: t.Optional[bool] = None, 2466 maximum: t.Optional[bool] = None, 2467 ) -> exp.DataBlocksizeProperty: 2468 self._match(TokenType.EQ) 2469 size = self._parse_number() 2470 2471 units = None 2472 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2473 units = self._prev.text 2474 2475 return self.expression( 2476 exp.DataBlocksizeProperty, 2477 size=size, 2478 units=units, 2479 default=default, 2480 minimum=minimum, 2481 maximum=maximum, 2482 ) 2483 2484 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2485 self._match(TokenType.EQ) 2486 always = self._match_text_seq("ALWAYS") 2487 manual = self._match_text_seq("MANUAL") 2488 never = self._match_text_seq("NEVER") 2489 default = self._match_text_seq("DEFAULT") 2490 2491 autotemp = None 2492 if self._match_text_seq("AUTOTEMP"): 2493 autotemp = self._parse_schema() 2494 2495 return self.expression( 2496 exp.BlockCompressionProperty, 2497 always=always, 2498 manual=manual, 2499 never=never, 2500 default=default, 2501 autotemp=autotemp, 2502 ) 2503 2504 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2505 index = self._index 2506 no = self._match_text_seq("NO") 2507 concurrent = self._match_text_seq("CONCURRENT") 2508 2509 if not self._match_text_seq("ISOLATED", "LOADING"): 2510 self._retreat(index) 2511 return None 2512 2513 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2514 return self.expression( 2515 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2516 ) 2517 2518 def _parse_locking(self) -> exp.LockingProperty: 2519 if self._match(TokenType.TABLE): 2520 kind = "TABLE" 2521 elif self._match(TokenType.VIEW): 2522 kind = "VIEW" 2523 elif self._match(TokenType.ROW): 2524 kind = "ROW" 2525 elif self._match_text_seq("DATABASE"): 2526 kind = "DATABASE" 2527 else: 2528 kind = None 2529 2530 if kind in ("DATABASE", "TABLE", "VIEW"): 2531 this = self._parse_table_parts() 2532 else: 2533 this = None 2534 2535 if self._match(TokenType.FOR): 2536 for_or_in = "FOR" 2537 elif self._match(TokenType.IN): 2538 for_or_in = "IN" 2539 else: 2540 for_or_in = None 2541 2542 if self._match_text_seq("ACCESS"): 2543 lock_type = "ACCESS" 2544 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2545 lock_type = "EXCLUSIVE" 2546 elif self._match_text_seq("SHARE"): 2547 lock_type = "SHARE" 2548 elif self._match_text_seq("READ"): 2549 lock_type = "READ" 2550 elif self._match_text_seq("WRITE"): 2551 lock_type = "WRITE" 2552 elif self._match_text_seq("CHECKSUM"): 2553 lock_type = "CHECKSUM" 2554 else: 2555 lock_type = None 2556 2557 override = self._match_text_seq("OVERRIDE") 2558 2559 return self.expression( 2560 exp.LockingProperty, 2561 this=this, 2562 kind=kind, 2563 for_or_in=for_or_in, 2564 lock_type=lock_type, 2565 override=override, 2566 ) 2567 2568 def _parse_partition_by(self) -> t.List[exp.Expression]: 2569 if self._match(TokenType.PARTITION_BY): 2570 return self._parse_csv(self._parse_assignment) 2571 return [] 2572 2573 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2574 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2575 if self._match_text_seq("MINVALUE"): 2576 return exp.var("MINVALUE") 2577 if self._match_text_seq("MAXVALUE"): 2578 return exp.var("MAXVALUE") 2579 return self._parse_bitwise() 2580 2581 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2582 expression = None 2583 from_expressions = None 2584 to_expressions = None 2585 2586 if self._match(TokenType.IN): 2587 this = self._parse_wrapped_csv(self._parse_bitwise) 2588 elif self._match(TokenType.FROM): 2589 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2590 self._match_text_seq("TO") 2591 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2592 elif self._match_text_seq("WITH", "(", "MODULUS"): 2593 this = self._parse_number() 2594 self._match_text_seq(",", "REMAINDER") 2595 expression = self._parse_number() 2596 self._match_r_paren() 2597 else: 2598 self.raise_error("Failed to parse partition bound spec.") 2599 2600 return self.expression( 2601 exp.PartitionBoundSpec, 2602 this=this, 2603 expression=expression, 2604 from_expressions=from_expressions, 2605 to_expressions=to_expressions, 2606 ) 2607 2608 # https://www.postgresql.org/docs/current/sql-createtable.html 2609 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2610 if not self._match_text_seq("OF"): 2611 self._retreat(self._index - 1) 2612 return None 2613 2614 this = self._parse_table(schema=True) 2615 2616 if self._match(TokenType.DEFAULT): 2617 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2618 elif self._match_text_seq("FOR", "VALUES"): 2619 expression = self._parse_partition_bound_spec() 2620 else: 2621 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2622 2623 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2624 2625 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2626 self._match(TokenType.EQ) 2627 return self.expression( 2628 exp.PartitionedByProperty, 2629 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2630 ) 2631 2632 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2633 if self._match_text_seq("AND", "STATISTICS"): 2634 statistics = True 2635 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2636 statistics = False 2637 else: 2638 statistics = None 2639 2640 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2641 2642 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2643 if self._match_text_seq("SQL"): 2644 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2645 return None 2646 2647 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2648 if self._match_text_seq("SQL", "DATA"): 2649 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2650 return None 2651 2652 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2653 if self._match_text_seq("PRIMARY", "INDEX"): 2654 return exp.NoPrimaryIndexProperty() 2655 if self._match_text_seq("SQL"): 2656 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2657 return None 2658 2659 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2660 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2661 return exp.OnCommitProperty() 2662 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2663 return exp.OnCommitProperty(delete=True) 2664 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2665 2666 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2667 if self._match_text_seq("SQL", "DATA"): 2668 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2669 return None 2670 2671 def _parse_distkey(self) -> exp.DistKeyProperty: 2672 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2673 2674 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2675 table = self._parse_table(schema=True) 2676 2677 options = [] 2678 while self._match_texts(("INCLUDING", "EXCLUDING")): 2679 this = self._prev.text.upper() 2680 2681 id_var = self._parse_id_var() 2682 if not id_var: 2683 return None 2684 2685 options.append( 2686 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2687 ) 2688 2689 return self.expression(exp.LikeProperty, this=table, expressions=options) 2690 2691 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2692 return self.expression( 2693 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2694 ) 2695 2696 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2697 self._match(TokenType.EQ) 2698 return self.expression( 2699 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2700 ) 2701 2702 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2703 self._match_text_seq("WITH", "CONNECTION") 2704 return self.expression( 2705 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2706 ) 2707 2708 def _parse_returns(self) -> exp.ReturnsProperty: 2709 value: t.Optional[exp.Expression] 2710 null = None 2711 is_table = self._match(TokenType.TABLE) 2712 2713 if is_table: 2714 if self._match(TokenType.LT): 2715 value = self.expression( 2716 exp.Schema, 2717 this="TABLE", 2718 expressions=self._parse_csv(self._parse_struct_types), 2719 ) 2720 if not self._match(TokenType.GT): 2721 self.raise_error("Expecting >") 2722 else: 2723 value = self._parse_schema(exp.var("TABLE")) 2724 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2725 null = True 2726 value = None 2727 else: 2728 value = self._parse_types() 2729 2730 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2731 2732 def _parse_describe(self) -> exp.Describe: 2733 kind = self._match_set(self.CREATABLES) and self._prev.text 2734 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2735 if self._match(TokenType.DOT): 2736 style = None 2737 self._retreat(self._index - 2) 2738 2739 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2740 2741 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2742 this = self._parse_statement() 2743 else: 2744 this = self._parse_table(schema=True) 2745 2746 properties = self._parse_properties() 2747 expressions = properties.expressions if properties else None 2748 partition = self._parse_partition() 2749 return self.expression( 2750 exp.Describe, 2751 this=this, 2752 style=style, 2753 kind=kind, 2754 expressions=expressions, 2755 partition=partition, 2756 format=format, 2757 ) 2758 2759 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2760 kind = self._prev.text.upper() 2761 expressions = [] 2762 2763 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2764 if self._match(TokenType.WHEN): 2765 expression = self._parse_disjunction() 2766 self._match(TokenType.THEN) 2767 else: 2768 expression = None 2769 2770 else_ = self._match(TokenType.ELSE) 2771 2772 if not self._match(TokenType.INTO): 2773 return None 2774 2775 return self.expression( 2776 exp.ConditionalInsert, 2777 this=self.expression( 2778 exp.Insert, 2779 this=self._parse_table(schema=True), 2780 expression=self._parse_derived_table_values(), 2781 ), 2782 expression=expression, 2783 else_=else_, 2784 ) 2785 2786 expression = parse_conditional_insert() 2787 while expression is not None: 2788 expressions.append(expression) 2789 expression = parse_conditional_insert() 2790 2791 return self.expression( 2792 exp.MultitableInserts, 2793 kind=kind, 2794 comments=comments, 2795 expressions=expressions, 2796 source=self._parse_table(), 2797 ) 2798 2799 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2800 comments = [] 2801 hint = self._parse_hint() 2802 overwrite = self._match(TokenType.OVERWRITE) 2803 ignore = self._match(TokenType.IGNORE) 2804 local = self._match_text_seq("LOCAL") 2805 alternative = None 2806 is_function = None 2807 2808 if self._match_text_seq("DIRECTORY"): 2809 this: t.Optional[exp.Expression] = self.expression( 2810 exp.Directory, 2811 this=self._parse_var_or_string(), 2812 local=local, 2813 row_format=self._parse_row_format(match_row=True), 2814 ) 2815 else: 2816 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2817 comments += ensure_list(self._prev_comments) 2818 return self._parse_multitable_inserts(comments) 2819 2820 if self._match(TokenType.OR): 2821 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2822 2823 self._match(TokenType.INTO) 2824 comments += ensure_list(self._prev_comments) 2825 self._match(TokenType.TABLE) 2826 is_function = self._match(TokenType.FUNCTION) 2827 2828 this = ( 2829 self._parse_table(schema=True, parse_partition=True) 2830 if not is_function 2831 else self._parse_function() 2832 ) 2833 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2834 this.set("alias", self._parse_table_alias()) 2835 2836 returning = self._parse_returning() 2837 2838 return self.expression( 2839 exp.Insert, 2840 comments=comments, 2841 hint=hint, 2842 is_function=is_function, 2843 this=this, 2844 stored=self._match_text_seq("STORED") and self._parse_stored(), 2845 by_name=self._match_text_seq("BY", "NAME"), 2846 exists=self._parse_exists(), 2847 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2848 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2849 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2850 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2851 conflict=self._parse_on_conflict(), 2852 returning=returning or self._parse_returning(), 2853 overwrite=overwrite, 2854 alternative=alternative, 2855 ignore=ignore, 2856 source=self._match(TokenType.TABLE) and self._parse_table(), 2857 ) 2858 2859 def _parse_kill(self) -> exp.Kill: 2860 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2861 2862 return self.expression( 2863 exp.Kill, 2864 this=self._parse_primary(), 2865 kind=kind, 2866 ) 2867 2868 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2869 conflict = self._match_text_seq("ON", "CONFLICT") 2870 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2871 2872 if not conflict and not duplicate: 2873 return None 2874 2875 conflict_keys = None 2876 constraint = None 2877 2878 if conflict: 2879 if self._match_text_seq("ON", "CONSTRAINT"): 2880 constraint = self._parse_id_var() 2881 elif self._match(TokenType.L_PAREN): 2882 conflict_keys = self._parse_csv(self._parse_id_var) 2883 self._match_r_paren() 2884 2885 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2886 if self._prev.token_type == TokenType.UPDATE: 2887 self._match(TokenType.SET) 2888 expressions = self._parse_csv(self._parse_equality) 2889 else: 2890 expressions = None 2891 2892 return self.expression( 2893 exp.OnConflict, 2894 duplicate=duplicate, 2895 expressions=expressions, 2896 action=action, 2897 conflict_keys=conflict_keys, 2898 constraint=constraint, 2899 where=self._parse_where(), 2900 ) 2901 2902 def _parse_returning(self) -> t.Optional[exp.Returning]: 2903 if not self._match(TokenType.RETURNING): 2904 return None 2905 return self.expression( 2906 exp.Returning, 2907 expressions=self._parse_csv(self._parse_expression), 2908 into=self._match(TokenType.INTO) and self._parse_table_part(), 2909 ) 2910 2911 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2912 if not self._match(TokenType.FORMAT): 2913 return None 2914 return self._parse_row_format() 2915 2916 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2917 index = self._index 2918 with_ = with_ or self._match_text_seq("WITH") 2919 2920 if not self._match(TokenType.SERDE_PROPERTIES): 2921 self._retreat(index) 2922 return None 2923 return self.expression( 2924 exp.SerdeProperties, 2925 **{ # type: ignore 2926 "expressions": self._parse_wrapped_properties(), 2927 "with": with_, 2928 }, 2929 ) 2930 2931 def _parse_row_format( 2932 self, match_row: bool = False 2933 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2934 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2935 return None 2936 2937 if self._match_text_seq("SERDE"): 2938 this = self._parse_string() 2939 2940 serde_properties = self._parse_serde_properties() 2941 2942 return self.expression( 2943 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2944 ) 2945 2946 self._match_text_seq("DELIMITED") 2947 2948 kwargs = {} 2949 2950 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2951 kwargs["fields"] = self._parse_string() 2952 if self._match_text_seq("ESCAPED", "BY"): 2953 kwargs["escaped"] = self._parse_string() 2954 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2955 kwargs["collection_items"] = self._parse_string() 2956 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2957 kwargs["map_keys"] = self._parse_string() 2958 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2959 kwargs["lines"] = self._parse_string() 2960 if self._match_text_seq("NULL", "DEFINED", "AS"): 2961 kwargs["null"] = self._parse_string() 2962 2963 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2964 2965 def _parse_load(self) -> exp.LoadData | exp.Command: 2966 if self._match_text_seq("DATA"): 2967 local = self._match_text_seq("LOCAL") 2968 self._match_text_seq("INPATH") 2969 inpath = self._parse_string() 2970 overwrite = self._match(TokenType.OVERWRITE) 2971 self._match_pair(TokenType.INTO, TokenType.TABLE) 2972 2973 return self.expression( 2974 exp.LoadData, 2975 this=self._parse_table(schema=True), 2976 local=local, 2977 overwrite=overwrite, 2978 inpath=inpath, 2979 partition=self._parse_partition(), 2980 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2981 serde=self._match_text_seq("SERDE") and self._parse_string(), 2982 ) 2983 return self._parse_as_command(self._prev) 2984 2985 def _parse_delete(self) -> exp.Delete: 2986 # This handles MySQL's "Multiple-Table Syntax" 2987 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2988 tables = None 2989 if not self._match(TokenType.FROM, advance=False): 2990 tables = self._parse_csv(self._parse_table) or None 2991 2992 returning = self._parse_returning() 2993 2994 return self.expression( 2995 exp.Delete, 2996 tables=tables, 2997 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2998 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2999 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3000 where=self._parse_where(), 3001 returning=returning or self._parse_returning(), 3002 limit=self._parse_limit(), 3003 ) 3004 3005 def _parse_update(self) -> exp.Update: 3006 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3007 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3008 returning = self._parse_returning() 3009 return self.expression( 3010 exp.Update, 3011 **{ # type: ignore 3012 "this": this, 3013 "expressions": expressions, 3014 "from": self._parse_from(joins=True), 3015 "where": self._parse_where(), 3016 "returning": returning or self._parse_returning(), 3017 "order": self._parse_order(), 3018 "limit": self._parse_limit(), 3019 }, 3020 ) 3021 3022 def _parse_use(self) -> exp.Use: 3023 return self.expression( 3024 exp.Use, 3025 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3026 this=self._parse_table(schema=False), 3027 ) 3028 3029 def _parse_uncache(self) -> exp.Uncache: 3030 if not self._match(TokenType.TABLE): 3031 self.raise_error("Expecting TABLE after UNCACHE") 3032 3033 return self.expression( 3034 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3035 ) 3036 3037 def _parse_cache(self) -> exp.Cache: 3038 lazy = self._match_text_seq("LAZY") 3039 self._match(TokenType.TABLE) 3040 table = self._parse_table(schema=True) 3041 3042 options = [] 3043 if self._match_text_seq("OPTIONS"): 3044 self._match_l_paren() 3045 k = self._parse_string() 3046 self._match(TokenType.EQ) 3047 v = self._parse_string() 3048 options = [k, v] 3049 self._match_r_paren() 3050 3051 self._match(TokenType.ALIAS) 3052 return self.expression( 3053 exp.Cache, 3054 this=table, 3055 lazy=lazy, 3056 options=options, 3057 expression=self._parse_select(nested=True), 3058 ) 3059 3060 def _parse_partition(self) -> t.Optional[exp.Partition]: 3061 if not self._match_texts(self.PARTITION_KEYWORDS): 3062 return None 3063 3064 return self.expression( 3065 exp.Partition, 3066 subpartition=self._prev.text.upper() == "SUBPARTITION", 3067 expressions=self._parse_wrapped_csv(self._parse_assignment), 3068 ) 3069 3070 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3071 def _parse_value_expression() -> t.Optional[exp.Expression]: 3072 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3073 return exp.var(self._prev.text.upper()) 3074 return self._parse_expression() 3075 3076 if self._match(TokenType.L_PAREN): 3077 expressions = self._parse_csv(_parse_value_expression) 3078 self._match_r_paren() 3079 return self.expression(exp.Tuple, expressions=expressions) 3080 3081 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3082 expression = self._parse_expression() 3083 if expression: 3084 return self.expression(exp.Tuple, expressions=[expression]) 3085 return None 3086 3087 def _parse_projections(self) -> t.List[exp.Expression]: 3088 return self._parse_expressions() 3089 3090 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3091 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3092 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3093 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3094 ) 3095 elif self._match(TokenType.FROM): 3096 from_ = self._parse_from(skip_from_token=True) 3097 # Support parentheses for duckdb FROM-first syntax 3098 select = self._parse_select() 3099 if select: 3100 select.set("from", from_) 3101 this = select 3102 else: 3103 this = exp.select("*").from_(t.cast(exp.From, from_)) 3104 else: 3105 this = ( 3106 self._parse_table() 3107 if table 3108 else self._parse_select(nested=True, parse_set_operation=False) 3109 ) 3110 3111 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3112 # in case a modifier (e.g. join) is following 3113 if table and isinstance(this, exp.Values) and this.alias: 3114 alias = this.args["alias"].pop() 3115 this = exp.Table(this=this, alias=alias) 3116 3117 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3118 3119 return this 3120 3121 def _parse_select( 3122 self, 3123 nested: bool = False, 3124 table: bool = False, 3125 parse_subquery_alias: bool = True, 3126 parse_set_operation: bool = True, 3127 ) -> t.Optional[exp.Expression]: 3128 cte = self._parse_with() 3129 3130 if cte: 3131 this = self._parse_statement() 3132 3133 if not this: 3134 self.raise_error("Failed to parse any statement following CTE") 3135 return cte 3136 3137 if "with" in this.arg_types: 3138 this.set("with", cte) 3139 else: 3140 self.raise_error(f"{this.key} does not support CTE") 3141 this = cte 3142 3143 return this 3144 3145 # duckdb supports leading with FROM x 3146 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3147 3148 if self._match(TokenType.SELECT): 3149 comments = self._prev_comments 3150 3151 hint = self._parse_hint() 3152 3153 if self._next and not self._next.token_type == TokenType.DOT: 3154 all_ = self._match(TokenType.ALL) 3155 distinct = self._match_set(self.DISTINCT_TOKENS) 3156 else: 3157 all_, distinct = None, None 3158 3159 kind = ( 3160 self._match(TokenType.ALIAS) 3161 and self._match_texts(("STRUCT", "VALUE")) 3162 and self._prev.text.upper() 3163 ) 3164 3165 if distinct: 3166 distinct = self.expression( 3167 exp.Distinct, 3168 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3169 ) 3170 3171 if all_ and distinct: 3172 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3173 3174 operation_modifiers = [] 3175 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3176 operation_modifiers.append(exp.var(self._prev.text.upper())) 3177 3178 limit = self._parse_limit(top=True) 3179 projections = self._parse_projections() 3180 3181 this = self.expression( 3182 exp.Select, 3183 kind=kind, 3184 hint=hint, 3185 distinct=distinct, 3186 expressions=projections, 3187 limit=limit, 3188 operation_modifiers=operation_modifiers or None, 3189 ) 3190 this.comments = comments 3191 3192 into = self._parse_into() 3193 if into: 3194 this.set("into", into) 3195 3196 if not from_: 3197 from_ = self._parse_from() 3198 3199 if from_: 3200 this.set("from", from_) 3201 3202 this = self._parse_query_modifiers(this) 3203 elif (table or nested) and self._match(TokenType.L_PAREN): 3204 this = self._parse_wrapped_select(table=table) 3205 3206 # We return early here so that the UNION isn't attached to the subquery by the 3207 # following call to _parse_set_operations, but instead becomes the parent node 3208 self._match_r_paren() 3209 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3210 elif self._match(TokenType.VALUES, advance=False): 3211 this = self._parse_derived_table_values() 3212 elif from_: 3213 this = exp.select("*").from_(from_.this, copy=False) 3214 elif self._match(TokenType.SUMMARIZE): 3215 table = self._match(TokenType.TABLE) 3216 this = self._parse_select() or self._parse_string() or self._parse_table() 3217 return self.expression(exp.Summarize, this=this, table=table) 3218 elif self._match(TokenType.DESCRIBE): 3219 this = self._parse_describe() 3220 elif self._match_text_seq("STREAM"): 3221 this = self._parse_function() 3222 if this: 3223 this = self.expression(exp.Stream, this=this) 3224 else: 3225 self._retreat(self._index - 1) 3226 else: 3227 this = None 3228 3229 return self._parse_set_operations(this) if parse_set_operation else this 3230 3231 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3232 self._match_text_seq("SEARCH") 3233 3234 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3235 3236 if not kind: 3237 return None 3238 3239 self._match_text_seq("FIRST", "BY") 3240 3241 return self.expression( 3242 exp.RecursiveWithSearch, 3243 kind=kind, 3244 this=self._parse_id_var(), 3245 expression=self._match_text_seq("SET") and self._parse_id_var(), 3246 using=self._match_text_seq("USING") and self._parse_id_var(), 3247 ) 3248 3249 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3250 if not skip_with_token and not self._match(TokenType.WITH): 3251 return None 3252 3253 comments = self._prev_comments 3254 recursive = self._match(TokenType.RECURSIVE) 3255 3256 last_comments = None 3257 expressions = [] 3258 while True: 3259 cte = self._parse_cte() 3260 if isinstance(cte, exp.CTE): 3261 expressions.append(cte) 3262 if last_comments: 3263 cte.add_comments(last_comments) 3264 3265 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3266 break 3267 else: 3268 self._match(TokenType.WITH) 3269 3270 last_comments = self._prev_comments 3271 3272 return self.expression( 3273 exp.With, 3274 comments=comments, 3275 expressions=expressions, 3276 recursive=recursive, 3277 search=self._parse_recursive_with_search(), 3278 ) 3279 3280 def _parse_cte(self) -> t.Optional[exp.CTE]: 3281 index = self._index 3282 3283 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3284 if not alias or not alias.this: 3285 self.raise_error("Expected CTE to have alias") 3286 3287 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3288 self._retreat(index) 3289 return None 3290 3291 comments = self._prev_comments 3292 3293 if self._match_text_seq("NOT", "MATERIALIZED"): 3294 materialized = False 3295 elif self._match_text_seq("MATERIALIZED"): 3296 materialized = True 3297 else: 3298 materialized = None 3299 3300 cte = self.expression( 3301 exp.CTE, 3302 this=self._parse_wrapped(self._parse_statement), 3303 alias=alias, 3304 materialized=materialized, 3305 comments=comments, 3306 ) 3307 3308 if isinstance(cte.this, exp.Values): 3309 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3310 3311 return cte 3312 3313 def _parse_table_alias( 3314 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3315 ) -> t.Optional[exp.TableAlias]: 3316 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3317 # so this section tries to parse the clause version and if it fails, it treats the token 3318 # as an identifier (alias) 3319 if self._can_parse_limit_or_offset(): 3320 return None 3321 3322 any_token = self._match(TokenType.ALIAS) 3323 alias = ( 3324 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3325 or self._parse_string_as_identifier() 3326 ) 3327 3328 index = self._index 3329 if self._match(TokenType.L_PAREN): 3330 columns = self._parse_csv(self._parse_function_parameter) 3331 self._match_r_paren() if columns else self._retreat(index) 3332 else: 3333 columns = None 3334 3335 if not alias and not columns: 3336 return None 3337 3338 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3339 3340 # We bubble up comments from the Identifier to the TableAlias 3341 if isinstance(alias, exp.Identifier): 3342 table_alias.add_comments(alias.pop_comments()) 3343 3344 return table_alias 3345 3346 def _parse_subquery( 3347 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3348 ) -> t.Optional[exp.Subquery]: 3349 if not this: 3350 return None 3351 3352 return self.expression( 3353 exp.Subquery, 3354 this=this, 3355 pivots=self._parse_pivots(), 3356 alias=self._parse_table_alias() if parse_alias else None, 3357 sample=self._parse_table_sample(), 3358 ) 3359 3360 def _implicit_unnests_to_explicit(self, this: E) -> E: 3361 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3362 3363 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3364 for i, join in enumerate(this.args.get("joins") or []): 3365 table = join.this 3366 normalized_table = table.copy() 3367 normalized_table.meta["maybe_column"] = True 3368 normalized_table = _norm(normalized_table, dialect=self.dialect) 3369 3370 if isinstance(table, exp.Table) and not join.args.get("on"): 3371 if normalized_table.parts[0].name in refs: 3372 table_as_column = table.to_column() 3373 unnest = exp.Unnest(expressions=[table_as_column]) 3374 3375 # Table.to_column creates a parent Alias node that we want to convert to 3376 # a TableAlias and attach to the Unnest, so it matches the parser's output 3377 if isinstance(table.args.get("alias"), exp.TableAlias): 3378 table_as_column.replace(table_as_column.this) 3379 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3380 3381 table.replace(unnest) 3382 3383 refs.add(normalized_table.alias_or_name) 3384 3385 return this 3386 3387 def _parse_query_modifiers( 3388 self, this: t.Optional[exp.Expression] 3389 ) -> t.Optional[exp.Expression]: 3390 if isinstance(this, self.MODIFIABLES): 3391 for join in self._parse_joins(): 3392 this.append("joins", join) 3393 for lateral in iter(self._parse_lateral, None): 3394 this.append("laterals", lateral) 3395 3396 while True: 3397 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3398 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3399 key, expression = parser(self) 3400 3401 if expression: 3402 this.set(key, expression) 3403 if key == "limit": 3404 offset = expression.args.pop("offset", None) 3405 3406 if offset: 3407 offset = exp.Offset(expression=offset) 3408 this.set("offset", offset) 3409 3410 limit_by_expressions = expression.expressions 3411 expression.set("expressions", None) 3412 offset.set("expressions", limit_by_expressions) 3413 continue 3414 break 3415 3416 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3417 this = self._implicit_unnests_to_explicit(this) 3418 3419 return this 3420 3421 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3422 start = self._curr 3423 while self._curr: 3424 self._advance() 3425 3426 end = self._tokens[self._index - 1] 3427 return exp.Hint(expressions=[self._find_sql(start, end)]) 3428 3429 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3430 return self._parse_function_call() 3431 3432 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3433 start_index = self._index 3434 should_fallback_to_string = False 3435 3436 hints = [] 3437 try: 3438 for hint in iter( 3439 lambda: self._parse_csv( 3440 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3441 ), 3442 [], 3443 ): 3444 hints.extend(hint) 3445 except ParseError: 3446 should_fallback_to_string = True 3447 3448 if should_fallback_to_string or self._curr: 3449 self._retreat(start_index) 3450 return self._parse_hint_fallback_to_string() 3451 3452 return self.expression(exp.Hint, expressions=hints) 3453 3454 def _parse_hint(self) -> t.Optional[exp.Hint]: 3455 if self._match(TokenType.HINT) and self._prev_comments: 3456 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3457 3458 return None 3459 3460 def _parse_into(self) -> t.Optional[exp.Into]: 3461 if not self._match(TokenType.INTO): 3462 return None 3463 3464 temp = self._match(TokenType.TEMPORARY) 3465 unlogged = self._match_text_seq("UNLOGGED") 3466 self._match(TokenType.TABLE) 3467 3468 return self.expression( 3469 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3470 ) 3471 3472 def _parse_from( 3473 self, joins: bool = False, skip_from_token: bool = False 3474 ) -> t.Optional[exp.From]: 3475 if not skip_from_token and not self._match(TokenType.FROM): 3476 return None 3477 3478 return self.expression( 3479 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3480 ) 3481 3482 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3483 return self.expression( 3484 exp.MatchRecognizeMeasure, 3485 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3486 this=self._parse_expression(), 3487 ) 3488 3489 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3490 if not self._match(TokenType.MATCH_RECOGNIZE): 3491 return None 3492 3493 self._match_l_paren() 3494 3495 partition = self._parse_partition_by() 3496 order = self._parse_order() 3497 3498 measures = ( 3499 self._parse_csv(self._parse_match_recognize_measure) 3500 if self._match_text_seq("MEASURES") 3501 else None 3502 ) 3503 3504 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3505 rows = exp.var("ONE ROW PER MATCH") 3506 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3507 text = "ALL ROWS PER MATCH" 3508 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3509 text += " SHOW EMPTY MATCHES" 3510 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3511 text += " OMIT EMPTY MATCHES" 3512 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3513 text += " WITH UNMATCHED ROWS" 3514 rows = exp.var(text) 3515 else: 3516 rows = None 3517 3518 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3519 text = "AFTER MATCH SKIP" 3520 if self._match_text_seq("PAST", "LAST", "ROW"): 3521 text += " PAST LAST ROW" 3522 elif self._match_text_seq("TO", "NEXT", "ROW"): 3523 text += " TO NEXT ROW" 3524 elif self._match_text_seq("TO", "FIRST"): 3525 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3526 elif self._match_text_seq("TO", "LAST"): 3527 text += f" TO LAST {self._advance_any().text}" # type: ignore 3528 after = exp.var(text) 3529 else: 3530 after = None 3531 3532 if self._match_text_seq("PATTERN"): 3533 self._match_l_paren() 3534 3535 if not self._curr: 3536 self.raise_error("Expecting )", self._curr) 3537 3538 paren = 1 3539 start = self._curr 3540 3541 while self._curr and paren > 0: 3542 if self._curr.token_type == TokenType.L_PAREN: 3543 paren += 1 3544 if self._curr.token_type == TokenType.R_PAREN: 3545 paren -= 1 3546 3547 end = self._prev 3548 self._advance() 3549 3550 if paren > 0: 3551 self.raise_error("Expecting )", self._curr) 3552 3553 pattern = exp.var(self._find_sql(start, end)) 3554 else: 3555 pattern = None 3556 3557 define = ( 3558 self._parse_csv(self._parse_name_as_expression) 3559 if self._match_text_seq("DEFINE") 3560 else None 3561 ) 3562 3563 self._match_r_paren() 3564 3565 return self.expression( 3566 exp.MatchRecognize, 3567 partition_by=partition, 3568 order=order, 3569 measures=measures, 3570 rows=rows, 3571 after=after, 3572 pattern=pattern, 3573 define=define, 3574 alias=self._parse_table_alias(), 3575 ) 3576 3577 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3578 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3579 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3580 cross_apply = False 3581 3582 if cross_apply is not None: 3583 this = self._parse_select(table=True) 3584 view = None 3585 outer = None 3586 elif self._match(TokenType.LATERAL): 3587 this = self._parse_select(table=True) 3588 view = self._match(TokenType.VIEW) 3589 outer = self._match(TokenType.OUTER) 3590 else: 3591 return None 3592 3593 if not this: 3594 this = ( 3595 self._parse_unnest() 3596 or self._parse_function() 3597 or self._parse_id_var(any_token=False) 3598 ) 3599 3600 while self._match(TokenType.DOT): 3601 this = exp.Dot( 3602 this=this, 3603 expression=self._parse_function() or self._parse_id_var(any_token=False), 3604 ) 3605 3606 ordinality: t.Optional[bool] = None 3607 3608 if view: 3609 table = self._parse_id_var(any_token=False) 3610 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3611 table_alias: t.Optional[exp.TableAlias] = self.expression( 3612 exp.TableAlias, this=table, columns=columns 3613 ) 3614 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3615 # We move the alias from the lateral's child node to the lateral itself 3616 table_alias = this.args["alias"].pop() 3617 else: 3618 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3619 table_alias = self._parse_table_alias() 3620 3621 return self.expression( 3622 exp.Lateral, 3623 this=this, 3624 view=view, 3625 outer=outer, 3626 alias=table_alias, 3627 cross_apply=cross_apply, 3628 ordinality=ordinality, 3629 ) 3630 3631 def _parse_join_parts( 3632 self, 3633 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3634 return ( 3635 self._match_set(self.JOIN_METHODS) and self._prev, 3636 self._match_set(self.JOIN_SIDES) and self._prev, 3637 self._match_set(self.JOIN_KINDS) and self._prev, 3638 ) 3639 3640 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3641 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3642 this = self._parse_column() 3643 if isinstance(this, exp.Column): 3644 return this.this 3645 return this 3646 3647 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3648 3649 def _parse_join( 3650 self, skip_join_token: bool = False, parse_bracket: bool = False 3651 ) -> t.Optional[exp.Join]: 3652 if self._match(TokenType.COMMA): 3653 table = self._try_parse(self._parse_table) 3654 if table: 3655 return self.expression(exp.Join, this=table) 3656 return None 3657 3658 index = self._index 3659 method, side, kind = self._parse_join_parts() 3660 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3661 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3662 3663 if not skip_join_token and not join: 3664 self._retreat(index) 3665 kind = None 3666 method = None 3667 side = None 3668 3669 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3670 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3671 3672 if not skip_join_token and not join and not outer_apply and not cross_apply: 3673 return None 3674 3675 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3676 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3677 kwargs["expressions"] = self._parse_csv( 3678 lambda: self._parse_table(parse_bracket=parse_bracket) 3679 ) 3680 3681 if method: 3682 kwargs["method"] = method.text 3683 if side: 3684 kwargs["side"] = side.text 3685 if kind: 3686 kwargs["kind"] = kind.text 3687 if hint: 3688 kwargs["hint"] = hint 3689 3690 if self._match(TokenType.MATCH_CONDITION): 3691 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3692 3693 if self._match(TokenType.ON): 3694 kwargs["on"] = self._parse_assignment() 3695 elif self._match(TokenType.USING): 3696 kwargs["using"] = self._parse_using_identifiers() 3697 elif ( 3698 not (outer_apply or cross_apply) 3699 and not isinstance(kwargs["this"], exp.Unnest) 3700 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3701 ): 3702 index = self._index 3703 joins: t.Optional[list] = list(self._parse_joins()) 3704 3705 if joins and self._match(TokenType.ON): 3706 kwargs["on"] = self._parse_assignment() 3707 elif joins and self._match(TokenType.USING): 3708 kwargs["using"] = self._parse_using_identifiers() 3709 else: 3710 joins = None 3711 self._retreat(index) 3712 3713 kwargs["this"].set("joins", joins if joins else None) 3714 3715 comments = [c for token in (method, side, kind) if token for c in token.comments] 3716 return self.expression(exp.Join, comments=comments, **kwargs) 3717 3718 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3719 this = self._parse_assignment() 3720 3721 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3722 return this 3723 3724 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3725 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3726 3727 return this 3728 3729 def _parse_index_params(self) -> exp.IndexParameters: 3730 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3731 3732 if self._match(TokenType.L_PAREN, advance=False): 3733 columns = self._parse_wrapped_csv(self._parse_with_operator) 3734 else: 3735 columns = None 3736 3737 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3738 partition_by = self._parse_partition_by() 3739 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3740 tablespace = ( 3741 self._parse_var(any_token=True) 3742 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3743 else None 3744 ) 3745 where = self._parse_where() 3746 3747 on = self._parse_field() if self._match(TokenType.ON) else None 3748 3749 return self.expression( 3750 exp.IndexParameters, 3751 using=using, 3752 columns=columns, 3753 include=include, 3754 partition_by=partition_by, 3755 where=where, 3756 with_storage=with_storage, 3757 tablespace=tablespace, 3758 on=on, 3759 ) 3760 3761 def _parse_index( 3762 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3763 ) -> t.Optional[exp.Index]: 3764 if index or anonymous: 3765 unique = None 3766 primary = None 3767 amp = None 3768 3769 self._match(TokenType.ON) 3770 self._match(TokenType.TABLE) # hive 3771 table = self._parse_table_parts(schema=True) 3772 else: 3773 unique = self._match(TokenType.UNIQUE) 3774 primary = self._match_text_seq("PRIMARY") 3775 amp = self._match_text_seq("AMP") 3776 3777 if not self._match(TokenType.INDEX): 3778 return None 3779 3780 index = self._parse_id_var() 3781 table = None 3782 3783 params = self._parse_index_params() 3784 3785 return self.expression( 3786 exp.Index, 3787 this=index, 3788 table=table, 3789 unique=unique, 3790 primary=primary, 3791 amp=amp, 3792 params=params, 3793 ) 3794 3795 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3796 hints: t.List[exp.Expression] = [] 3797 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3798 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3799 hints.append( 3800 self.expression( 3801 exp.WithTableHint, 3802 expressions=self._parse_csv( 3803 lambda: self._parse_function() or self._parse_var(any_token=True) 3804 ), 3805 ) 3806 ) 3807 self._match_r_paren() 3808 else: 3809 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3810 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3811 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3812 3813 self._match_set((TokenType.INDEX, TokenType.KEY)) 3814 if self._match(TokenType.FOR): 3815 hint.set("target", self._advance_any() and self._prev.text.upper()) 3816 3817 hint.set("expressions", self._parse_wrapped_id_vars()) 3818 hints.append(hint) 3819 3820 return hints or None 3821 3822 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3823 return ( 3824 (not schema and self._parse_function(optional_parens=False)) 3825 or self._parse_id_var(any_token=False) 3826 or self._parse_string_as_identifier() 3827 or self._parse_placeholder() 3828 ) 3829 3830 def _parse_table_parts( 3831 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3832 ) -> exp.Table: 3833 catalog = None 3834 db = None 3835 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3836 3837 while self._match(TokenType.DOT): 3838 if catalog: 3839 # This allows nesting the table in arbitrarily many dot expressions if needed 3840 table = self.expression( 3841 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3842 ) 3843 else: 3844 catalog = db 3845 db = table 3846 # "" used for tsql FROM a..b case 3847 table = self._parse_table_part(schema=schema) or "" 3848 3849 if ( 3850 wildcard 3851 and self._is_connected() 3852 and (isinstance(table, exp.Identifier) or not table) 3853 and self._match(TokenType.STAR) 3854 ): 3855 if isinstance(table, exp.Identifier): 3856 table.args["this"] += "*" 3857 else: 3858 table = exp.Identifier(this="*") 3859 3860 # We bubble up comments from the Identifier to the Table 3861 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3862 3863 if is_db_reference: 3864 catalog = db 3865 db = table 3866 table = None 3867 3868 if not table and not is_db_reference: 3869 self.raise_error(f"Expected table name but got {self._curr}") 3870 if not db and is_db_reference: 3871 self.raise_error(f"Expected database name but got {self._curr}") 3872 3873 table = self.expression( 3874 exp.Table, 3875 comments=comments, 3876 this=table, 3877 db=db, 3878 catalog=catalog, 3879 ) 3880 3881 changes = self._parse_changes() 3882 if changes: 3883 table.set("changes", changes) 3884 3885 at_before = self._parse_historical_data() 3886 if at_before: 3887 table.set("when", at_before) 3888 3889 pivots = self._parse_pivots() 3890 if pivots: 3891 table.set("pivots", pivots) 3892 3893 return table 3894 3895 def _parse_table( 3896 self, 3897 schema: bool = False, 3898 joins: bool = False, 3899 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3900 parse_bracket: bool = False, 3901 is_db_reference: bool = False, 3902 parse_partition: bool = False, 3903 ) -> t.Optional[exp.Expression]: 3904 lateral = self._parse_lateral() 3905 if lateral: 3906 return lateral 3907 3908 unnest = self._parse_unnest() 3909 if unnest: 3910 return unnest 3911 3912 values = self._parse_derived_table_values() 3913 if values: 3914 return values 3915 3916 subquery = self._parse_select(table=True) 3917 if subquery: 3918 if not subquery.args.get("pivots"): 3919 subquery.set("pivots", self._parse_pivots()) 3920 return subquery 3921 3922 bracket = parse_bracket and self._parse_bracket(None) 3923 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3924 3925 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3926 self._parse_table 3927 ) 3928 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3929 3930 only = self._match(TokenType.ONLY) 3931 3932 this = t.cast( 3933 exp.Expression, 3934 bracket 3935 or rows_from 3936 or self._parse_bracket( 3937 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3938 ), 3939 ) 3940 3941 if only: 3942 this.set("only", only) 3943 3944 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3945 self._match_text_seq("*") 3946 3947 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3948 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3949 this.set("partition", self._parse_partition()) 3950 3951 if schema: 3952 return self._parse_schema(this=this) 3953 3954 version = self._parse_version() 3955 3956 if version: 3957 this.set("version", version) 3958 3959 if self.dialect.ALIAS_POST_TABLESAMPLE: 3960 this.set("sample", self._parse_table_sample()) 3961 3962 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3963 if alias: 3964 this.set("alias", alias) 3965 3966 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3967 return self.expression( 3968 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3969 ) 3970 3971 this.set("hints", self._parse_table_hints()) 3972 3973 if not this.args.get("pivots"): 3974 this.set("pivots", self._parse_pivots()) 3975 3976 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3977 this.set("sample", self._parse_table_sample()) 3978 3979 if joins: 3980 for join in self._parse_joins(): 3981 this.append("joins", join) 3982 3983 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3984 this.set("ordinality", True) 3985 this.set("alias", self._parse_table_alias()) 3986 3987 return this 3988 3989 def _parse_version(self) -> t.Optional[exp.Version]: 3990 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3991 this = "TIMESTAMP" 3992 elif self._match(TokenType.VERSION_SNAPSHOT): 3993 this = "VERSION" 3994 else: 3995 return None 3996 3997 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3998 kind = self._prev.text.upper() 3999 start = self._parse_bitwise() 4000 self._match_texts(("TO", "AND")) 4001 end = self._parse_bitwise() 4002 expression: t.Optional[exp.Expression] = self.expression( 4003 exp.Tuple, expressions=[start, end] 4004 ) 4005 elif self._match_text_seq("CONTAINED", "IN"): 4006 kind = "CONTAINED IN" 4007 expression = self.expression( 4008 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4009 ) 4010 elif self._match(TokenType.ALL): 4011 kind = "ALL" 4012 expression = None 4013 else: 4014 self._match_text_seq("AS", "OF") 4015 kind = "AS OF" 4016 expression = self._parse_type() 4017 4018 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4019 4020 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4021 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4022 index = self._index 4023 historical_data = None 4024 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4025 this = self._prev.text.upper() 4026 kind = ( 4027 self._match(TokenType.L_PAREN) 4028 and self._match_texts(self.HISTORICAL_DATA_KIND) 4029 and self._prev.text.upper() 4030 ) 4031 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4032 4033 if expression: 4034 self._match_r_paren() 4035 historical_data = self.expression( 4036 exp.HistoricalData, this=this, kind=kind, expression=expression 4037 ) 4038 else: 4039 self._retreat(index) 4040 4041 return historical_data 4042 4043 def _parse_changes(self) -> t.Optional[exp.Changes]: 4044 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4045 return None 4046 4047 information = self._parse_var(any_token=True) 4048 self._match_r_paren() 4049 4050 return self.expression( 4051 exp.Changes, 4052 information=information, 4053 at_before=self._parse_historical_data(), 4054 end=self._parse_historical_data(), 4055 ) 4056 4057 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4058 if not self._match(TokenType.UNNEST): 4059 return None 4060 4061 expressions = self._parse_wrapped_csv(self._parse_equality) 4062 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4063 4064 alias = self._parse_table_alias() if with_alias else None 4065 4066 if alias: 4067 if self.dialect.UNNEST_COLUMN_ONLY: 4068 if alias.args.get("columns"): 4069 self.raise_error("Unexpected extra column alias in unnest.") 4070 4071 alias.set("columns", [alias.this]) 4072 alias.set("this", None) 4073 4074 columns = alias.args.get("columns") or [] 4075 if offset and len(expressions) < len(columns): 4076 offset = columns.pop() 4077 4078 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4079 self._match(TokenType.ALIAS) 4080 offset = self._parse_id_var( 4081 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4082 ) or exp.to_identifier("offset") 4083 4084 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4085 4086 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4087 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4088 if not is_derived and not ( 4089 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4090 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4091 ): 4092 return None 4093 4094 expressions = self._parse_csv(self._parse_value) 4095 alias = self._parse_table_alias() 4096 4097 if is_derived: 4098 self._match_r_paren() 4099 4100 return self.expression( 4101 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4102 ) 4103 4104 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4105 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4106 as_modifier and self._match_text_seq("USING", "SAMPLE") 4107 ): 4108 return None 4109 4110 bucket_numerator = None 4111 bucket_denominator = None 4112 bucket_field = None 4113 percent = None 4114 size = None 4115 seed = None 4116 4117 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4118 matched_l_paren = self._match(TokenType.L_PAREN) 4119 4120 if self.TABLESAMPLE_CSV: 4121 num = None 4122 expressions = self._parse_csv(self._parse_primary) 4123 else: 4124 expressions = None 4125 num = ( 4126 self._parse_factor() 4127 if self._match(TokenType.NUMBER, advance=False) 4128 else self._parse_primary() or self._parse_placeholder() 4129 ) 4130 4131 if self._match_text_seq("BUCKET"): 4132 bucket_numerator = self._parse_number() 4133 self._match_text_seq("OUT", "OF") 4134 bucket_denominator = bucket_denominator = self._parse_number() 4135 self._match(TokenType.ON) 4136 bucket_field = self._parse_field() 4137 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4138 percent = num 4139 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4140 size = num 4141 else: 4142 percent = num 4143 4144 if matched_l_paren: 4145 self._match_r_paren() 4146 4147 if self._match(TokenType.L_PAREN): 4148 method = self._parse_var(upper=True) 4149 seed = self._match(TokenType.COMMA) and self._parse_number() 4150 self._match_r_paren() 4151 elif self._match_texts(("SEED", "REPEATABLE")): 4152 seed = self._parse_wrapped(self._parse_number) 4153 4154 if not method and self.DEFAULT_SAMPLING_METHOD: 4155 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4156 4157 return self.expression( 4158 exp.TableSample, 4159 expressions=expressions, 4160 method=method, 4161 bucket_numerator=bucket_numerator, 4162 bucket_denominator=bucket_denominator, 4163 bucket_field=bucket_field, 4164 percent=percent, 4165 size=size, 4166 seed=seed, 4167 ) 4168 4169 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4170 return list(iter(self._parse_pivot, None)) or None 4171 4172 def _parse_joins(self) -> t.Iterator[exp.Join]: 4173 return iter(self._parse_join, None) 4174 4175 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4176 if not self._match(TokenType.INTO): 4177 return None 4178 4179 return self.expression( 4180 exp.UnpivotColumns, 4181 this=self._match_text_seq("NAME") and self._parse_column(), 4182 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4183 ) 4184 4185 # https://duckdb.org/docs/sql/statements/pivot 4186 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4187 def _parse_on() -> t.Optional[exp.Expression]: 4188 this = self._parse_bitwise() 4189 4190 if self._match(TokenType.IN): 4191 # PIVOT ... ON col IN (row_val1, row_val2) 4192 return self._parse_in(this) 4193 if self._match(TokenType.ALIAS, advance=False): 4194 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4195 return self._parse_alias(this) 4196 4197 return this 4198 4199 this = self._parse_table() 4200 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4201 into = self._parse_unpivot_columns() 4202 using = self._match(TokenType.USING) and self._parse_csv( 4203 lambda: self._parse_alias(self._parse_function()) 4204 ) 4205 group = self._parse_group() 4206 4207 return self.expression( 4208 exp.Pivot, 4209 this=this, 4210 expressions=expressions, 4211 using=using, 4212 group=group, 4213 unpivot=is_unpivot, 4214 into=into, 4215 ) 4216 4217 def _parse_pivot_in(self) -> exp.In: 4218 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4219 this = self._parse_select_or_expression() 4220 4221 self._match(TokenType.ALIAS) 4222 alias = self._parse_bitwise() 4223 if alias: 4224 if isinstance(alias, exp.Column) and not alias.db: 4225 alias = alias.this 4226 return self.expression(exp.PivotAlias, this=this, alias=alias) 4227 4228 return this 4229 4230 value = self._parse_column() 4231 4232 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4233 self.raise_error("Expecting IN (") 4234 4235 if self._match(TokenType.ANY): 4236 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4237 else: 4238 exprs = self._parse_csv(_parse_aliased_expression) 4239 4240 self._match_r_paren() 4241 return self.expression(exp.In, this=value, expressions=exprs) 4242 4243 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4244 index = self._index 4245 include_nulls = None 4246 4247 if self._match(TokenType.PIVOT): 4248 unpivot = False 4249 elif self._match(TokenType.UNPIVOT): 4250 unpivot = True 4251 4252 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4253 if self._match_text_seq("INCLUDE", "NULLS"): 4254 include_nulls = True 4255 elif self._match_text_seq("EXCLUDE", "NULLS"): 4256 include_nulls = False 4257 else: 4258 return None 4259 4260 expressions = [] 4261 4262 if not self._match(TokenType.L_PAREN): 4263 self._retreat(index) 4264 return None 4265 4266 if unpivot: 4267 expressions = self._parse_csv(self._parse_column) 4268 else: 4269 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4270 4271 if not expressions: 4272 self.raise_error("Failed to parse PIVOT's aggregation list") 4273 4274 if not self._match(TokenType.FOR): 4275 self.raise_error("Expecting FOR") 4276 4277 fields = [] 4278 while True: 4279 field = self._try_parse(self._parse_pivot_in) 4280 if not field: 4281 break 4282 fields.append(field) 4283 4284 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4285 self._parse_bitwise 4286 ) 4287 4288 group = self._parse_group() 4289 4290 self._match_r_paren() 4291 4292 pivot = self.expression( 4293 exp.Pivot, 4294 expressions=expressions, 4295 fields=fields, 4296 unpivot=unpivot, 4297 include_nulls=include_nulls, 4298 default_on_null=default_on_null, 4299 group=group, 4300 ) 4301 4302 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4303 pivot.set("alias", self._parse_table_alias()) 4304 4305 if not unpivot: 4306 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4307 4308 columns: t.List[exp.Expression] = [] 4309 all_fields = [] 4310 for pivot_field in pivot.fields: 4311 pivot_field_expressions = pivot_field.expressions 4312 4313 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4314 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4315 continue 4316 4317 all_fields.append( 4318 [ 4319 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4320 for fld in pivot_field_expressions 4321 ] 4322 ) 4323 4324 if all_fields: 4325 if names: 4326 all_fields.append(names) 4327 4328 # Generate all possible combinations of the pivot columns 4329 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4330 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4331 for fld_parts_tuple in itertools.product(*all_fields): 4332 fld_parts = list(fld_parts_tuple) 4333 4334 if names and self.PREFIXED_PIVOT_COLUMNS: 4335 # Move the "name" to the front of the list 4336 fld_parts.insert(0, fld_parts.pop(-1)) 4337 4338 columns.append(exp.to_identifier("_".join(fld_parts))) 4339 4340 pivot.set("columns", columns) 4341 4342 return pivot 4343 4344 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4345 return [agg.alias for agg in aggregations if agg.alias] 4346 4347 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4348 if not skip_where_token and not self._match(TokenType.PREWHERE): 4349 return None 4350 4351 return self.expression( 4352 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4353 ) 4354 4355 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4356 if not skip_where_token and not self._match(TokenType.WHERE): 4357 return None 4358 4359 return self.expression( 4360 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4361 ) 4362 4363 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4364 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4365 return None 4366 4367 elements: t.Dict[str, t.Any] = defaultdict(list) 4368 4369 if self._match(TokenType.ALL): 4370 elements["all"] = True 4371 elif self._match(TokenType.DISTINCT): 4372 elements["all"] = False 4373 4374 while True: 4375 index = self._index 4376 4377 elements["expressions"].extend( 4378 self._parse_csv( 4379 lambda: None 4380 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4381 else self._parse_assignment() 4382 ) 4383 ) 4384 4385 before_with_index = self._index 4386 with_prefix = self._match(TokenType.WITH) 4387 4388 if self._match(TokenType.ROLLUP): 4389 elements["rollup"].append( 4390 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4391 ) 4392 elif self._match(TokenType.CUBE): 4393 elements["cube"].append( 4394 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4395 ) 4396 elif self._match(TokenType.GROUPING_SETS): 4397 elements["grouping_sets"].append( 4398 self.expression( 4399 exp.GroupingSets, 4400 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4401 ) 4402 ) 4403 elif self._match_text_seq("TOTALS"): 4404 elements["totals"] = True # type: ignore 4405 4406 if before_with_index <= self._index <= before_with_index + 1: 4407 self._retreat(before_with_index) 4408 break 4409 4410 if index == self._index: 4411 break 4412 4413 return self.expression(exp.Group, **elements) # type: ignore 4414 4415 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4416 return self.expression( 4417 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4418 ) 4419 4420 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4421 if self._match(TokenType.L_PAREN): 4422 grouping_set = self._parse_csv(self._parse_column) 4423 self._match_r_paren() 4424 return self.expression(exp.Tuple, expressions=grouping_set) 4425 4426 return self._parse_column() 4427 4428 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4429 if not skip_having_token and not self._match(TokenType.HAVING): 4430 return None 4431 return self.expression(exp.Having, this=self._parse_assignment()) 4432 4433 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4434 if not self._match(TokenType.QUALIFY): 4435 return None 4436 return self.expression(exp.Qualify, this=self._parse_assignment()) 4437 4438 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4439 if skip_start_token: 4440 start = None 4441 elif self._match(TokenType.START_WITH): 4442 start = self._parse_assignment() 4443 else: 4444 return None 4445 4446 self._match(TokenType.CONNECT_BY) 4447 nocycle = self._match_text_seq("NOCYCLE") 4448 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4449 exp.Prior, this=self._parse_bitwise() 4450 ) 4451 connect = self._parse_assignment() 4452 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4453 4454 if not start and self._match(TokenType.START_WITH): 4455 start = self._parse_assignment() 4456 4457 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4458 4459 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4460 this = self._parse_id_var(any_token=True) 4461 if self._match(TokenType.ALIAS): 4462 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4463 return this 4464 4465 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4466 if self._match_text_seq("INTERPOLATE"): 4467 return self._parse_wrapped_csv(self._parse_name_as_expression) 4468 return None 4469 4470 def _parse_order( 4471 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4472 ) -> t.Optional[exp.Expression]: 4473 siblings = None 4474 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4475 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4476 return this 4477 4478 siblings = True 4479 4480 return self.expression( 4481 exp.Order, 4482 this=this, 4483 expressions=self._parse_csv(self._parse_ordered), 4484 siblings=siblings, 4485 ) 4486 4487 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4488 if not self._match(token): 4489 return None 4490 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4491 4492 def _parse_ordered( 4493 self, parse_method: t.Optional[t.Callable] = None 4494 ) -> t.Optional[exp.Ordered]: 4495 this = parse_method() if parse_method else self._parse_assignment() 4496 if not this: 4497 return None 4498 4499 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4500 this = exp.var("ALL") 4501 4502 asc = self._match(TokenType.ASC) 4503 desc = self._match(TokenType.DESC) or (asc and False) 4504 4505 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4506 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4507 4508 nulls_first = is_nulls_first or False 4509 explicitly_null_ordered = is_nulls_first or is_nulls_last 4510 4511 if ( 4512 not explicitly_null_ordered 4513 and ( 4514 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4515 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4516 ) 4517 and self.dialect.NULL_ORDERING != "nulls_are_last" 4518 ): 4519 nulls_first = True 4520 4521 if self._match_text_seq("WITH", "FILL"): 4522 with_fill = self.expression( 4523 exp.WithFill, 4524 **{ # type: ignore 4525 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4526 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4527 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4528 "interpolate": self._parse_interpolate(), 4529 }, 4530 ) 4531 else: 4532 with_fill = None 4533 4534 return self.expression( 4535 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4536 ) 4537 4538 def _parse_limit_options(self) -> exp.LimitOptions: 4539 percent = self._match(TokenType.PERCENT) 4540 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4541 self._match_text_seq("ONLY") 4542 with_ties = self._match_text_seq("WITH", "TIES") 4543 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4544 4545 def _parse_limit( 4546 self, 4547 this: t.Optional[exp.Expression] = None, 4548 top: bool = False, 4549 skip_limit_token: bool = False, 4550 ) -> t.Optional[exp.Expression]: 4551 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4552 comments = self._prev_comments 4553 if top: 4554 limit_paren = self._match(TokenType.L_PAREN) 4555 expression = self._parse_term() if limit_paren else self._parse_number() 4556 4557 if limit_paren: 4558 self._match_r_paren() 4559 4560 limit_options = self._parse_limit_options() 4561 else: 4562 limit_options = None 4563 expression = self._parse_term() 4564 4565 if self._match(TokenType.COMMA): 4566 offset = expression 4567 expression = self._parse_term() 4568 else: 4569 offset = None 4570 4571 limit_exp = self.expression( 4572 exp.Limit, 4573 this=this, 4574 expression=expression, 4575 offset=offset, 4576 comments=comments, 4577 limit_options=limit_options, 4578 expressions=self._parse_limit_by(), 4579 ) 4580 4581 return limit_exp 4582 4583 if self._match(TokenType.FETCH): 4584 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4585 direction = self._prev.text.upper() if direction else "FIRST" 4586 4587 count = self._parse_field(tokens=self.FETCH_TOKENS) 4588 4589 return self.expression( 4590 exp.Fetch, 4591 direction=direction, 4592 count=count, 4593 limit_options=self._parse_limit_options(), 4594 ) 4595 4596 return this 4597 4598 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4599 if not self._match(TokenType.OFFSET): 4600 return this 4601 4602 count = self._parse_term() 4603 self._match_set((TokenType.ROW, TokenType.ROWS)) 4604 4605 return self.expression( 4606 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4607 ) 4608 4609 def _can_parse_limit_or_offset(self) -> bool: 4610 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4611 return False 4612 4613 index = self._index 4614 result = bool( 4615 self._try_parse(self._parse_limit, retreat=True) 4616 or self._try_parse(self._parse_offset, retreat=True) 4617 ) 4618 self._retreat(index) 4619 return result 4620 4621 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4622 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4623 4624 def _parse_locks(self) -> t.List[exp.Lock]: 4625 locks = [] 4626 while True: 4627 if self._match_text_seq("FOR", "UPDATE"): 4628 update = True 4629 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4630 "LOCK", "IN", "SHARE", "MODE" 4631 ): 4632 update = False 4633 else: 4634 break 4635 4636 expressions = None 4637 if self._match_text_seq("OF"): 4638 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4639 4640 wait: t.Optional[bool | exp.Expression] = None 4641 if self._match_text_seq("NOWAIT"): 4642 wait = True 4643 elif self._match_text_seq("WAIT"): 4644 wait = self._parse_primary() 4645 elif self._match_text_seq("SKIP", "LOCKED"): 4646 wait = False 4647 4648 locks.append( 4649 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4650 ) 4651 4652 return locks 4653 4654 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4655 start = self._index 4656 _, side_token, kind_token = self._parse_join_parts() 4657 4658 side = side_token.text if side_token else None 4659 kind = kind_token.text if kind_token else None 4660 4661 if not self._match_set(self.SET_OPERATIONS): 4662 self._retreat(start) 4663 return None 4664 4665 token_type = self._prev.token_type 4666 4667 if token_type == TokenType.UNION: 4668 operation: t.Type[exp.SetOperation] = exp.Union 4669 elif token_type == TokenType.EXCEPT: 4670 operation = exp.Except 4671 else: 4672 operation = exp.Intersect 4673 4674 comments = self._prev.comments 4675 4676 if self._match(TokenType.DISTINCT): 4677 distinct: t.Optional[bool] = True 4678 elif self._match(TokenType.ALL): 4679 distinct = False 4680 else: 4681 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4682 if distinct is None: 4683 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4684 4685 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4686 "STRICT", "CORRESPONDING" 4687 ) 4688 if self._match_text_seq("CORRESPONDING"): 4689 by_name = True 4690 if not side and not kind: 4691 kind = "INNER" 4692 4693 on_column_list = None 4694 if by_name and self._match_texts(("ON", "BY")): 4695 on_column_list = self._parse_wrapped_csv(self._parse_column) 4696 4697 expression = self._parse_select(nested=True, parse_set_operation=False) 4698 4699 return self.expression( 4700 operation, 4701 comments=comments, 4702 this=this, 4703 distinct=distinct, 4704 by_name=by_name, 4705 expression=expression, 4706 side=side, 4707 kind=kind, 4708 on=on_column_list, 4709 ) 4710 4711 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4712 while True: 4713 setop = self.parse_set_operation(this) 4714 if not setop: 4715 break 4716 this = setop 4717 4718 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4719 expression = this.expression 4720 4721 if expression: 4722 for arg in self.SET_OP_MODIFIERS: 4723 expr = expression.args.get(arg) 4724 if expr: 4725 this.set(arg, expr.pop()) 4726 4727 return this 4728 4729 def _parse_expression(self) -> t.Optional[exp.Expression]: 4730 return self._parse_alias(self._parse_assignment()) 4731 4732 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4733 this = self._parse_disjunction() 4734 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4735 # This allows us to parse <non-identifier token> := <expr> 4736 this = exp.column( 4737 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4738 ) 4739 4740 while self._match_set(self.ASSIGNMENT): 4741 if isinstance(this, exp.Column) and len(this.parts) == 1: 4742 this = this.this 4743 4744 this = self.expression( 4745 self.ASSIGNMENT[self._prev.token_type], 4746 this=this, 4747 comments=self._prev_comments, 4748 expression=self._parse_assignment(), 4749 ) 4750 4751 return this 4752 4753 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4754 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4755 4756 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4757 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4758 4759 def _parse_equality(self) -> t.Optional[exp.Expression]: 4760 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4761 4762 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4763 return self._parse_tokens(self._parse_range, self.COMPARISON) 4764 4765 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4766 this = this or self._parse_bitwise() 4767 negate = self._match(TokenType.NOT) 4768 4769 if self._match_set(self.RANGE_PARSERS): 4770 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4771 if not expression: 4772 return this 4773 4774 this = expression 4775 elif self._match(TokenType.ISNULL): 4776 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4777 4778 # Postgres supports ISNULL and NOTNULL for conditions. 4779 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4780 if self._match(TokenType.NOTNULL): 4781 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4782 this = self.expression(exp.Not, this=this) 4783 4784 if negate: 4785 this = self._negate_range(this) 4786 4787 if self._match(TokenType.IS): 4788 this = self._parse_is(this) 4789 4790 return this 4791 4792 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4793 if not this: 4794 return this 4795 4796 return self.expression(exp.Not, this=this) 4797 4798 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4799 index = self._index - 1 4800 negate = self._match(TokenType.NOT) 4801 4802 if self._match_text_seq("DISTINCT", "FROM"): 4803 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4804 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4805 4806 if self._match(TokenType.JSON): 4807 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4808 4809 if self._match_text_seq("WITH"): 4810 _with = True 4811 elif self._match_text_seq("WITHOUT"): 4812 _with = False 4813 else: 4814 _with = None 4815 4816 unique = self._match(TokenType.UNIQUE) 4817 self._match_text_seq("KEYS") 4818 expression: t.Optional[exp.Expression] = self.expression( 4819 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4820 ) 4821 else: 4822 expression = self._parse_primary() or self._parse_null() 4823 if not expression: 4824 self._retreat(index) 4825 return None 4826 4827 this = self.expression(exp.Is, this=this, expression=expression) 4828 return self.expression(exp.Not, this=this) if negate else this 4829 4830 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4831 unnest = self._parse_unnest(with_alias=False) 4832 if unnest: 4833 this = self.expression(exp.In, this=this, unnest=unnest) 4834 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4835 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4836 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4837 4838 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4839 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4840 else: 4841 this = self.expression(exp.In, this=this, expressions=expressions) 4842 4843 if matched_l_paren: 4844 self._match_r_paren(this) 4845 elif not self._match(TokenType.R_BRACKET, expression=this): 4846 self.raise_error("Expecting ]") 4847 else: 4848 this = self.expression(exp.In, this=this, field=self._parse_column()) 4849 4850 return this 4851 4852 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4853 low = self._parse_bitwise() 4854 self._match(TokenType.AND) 4855 high = self._parse_bitwise() 4856 return self.expression(exp.Between, this=this, low=low, high=high) 4857 4858 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4859 if not self._match(TokenType.ESCAPE): 4860 return this 4861 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4862 4863 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4864 index = self._index 4865 4866 if not self._match(TokenType.INTERVAL) and match_interval: 4867 return None 4868 4869 if self._match(TokenType.STRING, advance=False): 4870 this = self._parse_primary() 4871 else: 4872 this = self._parse_term() 4873 4874 if not this or ( 4875 isinstance(this, exp.Column) 4876 and not this.table 4877 and not this.this.quoted 4878 and this.name.upper() == "IS" 4879 ): 4880 self._retreat(index) 4881 return None 4882 4883 unit = self._parse_function() or ( 4884 not self._match(TokenType.ALIAS, advance=False) 4885 and self._parse_var(any_token=True, upper=True) 4886 ) 4887 4888 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4889 # each INTERVAL expression into this canonical form so it's easy to transpile 4890 if this and this.is_number: 4891 this = exp.Literal.string(this.to_py()) 4892 elif this and this.is_string: 4893 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4894 if parts and unit: 4895 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4896 unit = None 4897 self._retreat(self._index - 1) 4898 4899 if len(parts) == 1: 4900 this = exp.Literal.string(parts[0][0]) 4901 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4902 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4903 unit = self.expression( 4904 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4905 ) 4906 4907 interval = self.expression(exp.Interval, this=this, unit=unit) 4908 4909 index = self._index 4910 self._match(TokenType.PLUS) 4911 4912 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4913 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4914 return self.expression( 4915 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4916 ) 4917 4918 self._retreat(index) 4919 return interval 4920 4921 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4922 this = self._parse_term() 4923 4924 while True: 4925 if self._match_set(self.BITWISE): 4926 this = self.expression( 4927 self.BITWISE[self._prev.token_type], 4928 this=this, 4929 expression=self._parse_term(), 4930 ) 4931 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4932 this = self.expression( 4933 exp.DPipe, 4934 this=this, 4935 expression=self._parse_term(), 4936 safe=not self.dialect.STRICT_STRING_CONCAT, 4937 ) 4938 elif self._match(TokenType.DQMARK): 4939 this = self.expression( 4940 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4941 ) 4942 elif self._match_pair(TokenType.LT, TokenType.LT): 4943 this = self.expression( 4944 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4945 ) 4946 elif self._match_pair(TokenType.GT, TokenType.GT): 4947 this = self.expression( 4948 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4949 ) 4950 else: 4951 break 4952 4953 return this 4954 4955 def _parse_term(self) -> t.Optional[exp.Expression]: 4956 this = self._parse_factor() 4957 4958 while self._match_set(self.TERM): 4959 klass = self.TERM[self._prev.token_type] 4960 comments = self._prev_comments 4961 expression = self._parse_factor() 4962 4963 this = self.expression(klass, this=this, comments=comments, expression=expression) 4964 4965 if isinstance(this, exp.Collate): 4966 expr = this.expression 4967 4968 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4969 # fallback to Identifier / Var 4970 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4971 ident = expr.this 4972 if isinstance(ident, exp.Identifier): 4973 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4974 4975 return this 4976 4977 def _parse_factor(self) -> t.Optional[exp.Expression]: 4978 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4979 this = parse_method() 4980 4981 while self._match_set(self.FACTOR): 4982 klass = self.FACTOR[self._prev.token_type] 4983 comments = self._prev_comments 4984 expression = parse_method() 4985 4986 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4987 self._retreat(self._index - 1) 4988 return this 4989 4990 this = self.expression(klass, this=this, comments=comments, expression=expression) 4991 4992 if isinstance(this, exp.Div): 4993 this.args["typed"] = self.dialect.TYPED_DIVISION 4994 this.args["safe"] = self.dialect.SAFE_DIVISION 4995 4996 return this 4997 4998 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4999 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5000 5001 def _parse_unary(self) -> t.Optional[exp.Expression]: 5002 if self._match_set(self.UNARY_PARSERS): 5003 return self.UNARY_PARSERS[self._prev.token_type](self) 5004 return self._parse_at_time_zone(self._parse_type()) 5005 5006 def _parse_type( 5007 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5008 ) -> t.Optional[exp.Expression]: 5009 interval = parse_interval and self._parse_interval() 5010 if interval: 5011 return interval 5012 5013 index = self._index 5014 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5015 5016 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5017 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5018 if isinstance(data_type, exp.Cast): 5019 # This constructor can contain ops directly after it, for instance struct unnesting: 5020 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5021 return self._parse_column_ops(data_type) 5022 5023 if data_type: 5024 index2 = self._index 5025 this = self._parse_primary() 5026 5027 if isinstance(this, exp.Literal): 5028 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5029 if parser: 5030 return parser(self, this, data_type) 5031 5032 return self.expression(exp.Cast, this=this, to=data_type) 5033 5034 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5035 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5036 # 5037 # If the index difference here is greater than 1, that means the parser itself must have 5038 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5039 # 5040 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5041 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5042 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5043 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5044 # 5045 # In these cases, we don't really want to return the converted type, but instead retreat 5046 # and try to parse a Column or Identifier in the section below. 5047 if data_type.expressions and index2 - index > 1: 5048 self._retreat(index2) 5049 return self._parse_column_ops(data_type) 5050 5051 self._retreat(index) 5052 5053 if fallback_to_identifier: 5054 return self._parse_id_var() 5055 5056 this = self._parse_column() 5057 return this and self._parse_column_ops(this) 5058 5059 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5060 this = self._parse_type() 5061 if not this: 5062 return None 5063 5064 if isinstance(this, exp.Column) and not this.table: 5065 this = exp.var(this.name.upper()) 5066 5067 return self.expression( 5068 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5069 ) 5070 5071 def _parse_types( 5072 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5073 ) -> t.Optional[exp.Expression]: 5074 index = self._index 5075 5076 this: t.Optional[exp.Expression] = None 5077 prefix = self._match_text_seq("SYSUDTLIB", ".") 5078 5079 if not self._match_set(self.TYPE_TOKENS): 5080 identifier = allow_identifiers and self._parse_id_var( 5081 any_token=False, tokens=(TokenType.VAR,) 5082 ) 5083 if isinstance(identifier, exp.Identifier): 5084 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5085 5086 if len(tokens) != 1: 5087 self.raise_error("Unexpected identifier", self._prev) 5088 5089 if tokens[0].token_type in self.TYPE_TOKENS: 5090 self._prev = tokens[0] 5091 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5092 type_name = identifier.name 5093 5094 while self._match(TokenType.DOT): 5095 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5096 5097 this = exp.DataType.build(type_name, udt=True) 5098 else: 5099 self._retreat(self._index - 1) 5100 return None 5101 else: 5102 return None 5103 5104 type_token = self._prev.token_type 5105 5106 if type_token == TokenType.PSEUDO_TYPE: 5107 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5108 5109 if type_token == TokenType.OBJECT_IDENTIFIER: 5110 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5111 5112 # https://materialize.com/docs/sql/types/map/ 5113 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5114 key_type = self._parse_types( 5115 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5116 ) 5117 if not self._match(TokenType.FARROW): 5118 self._retreat(index) 5119 return None 5120 5121 value_type = self._parse_types( 5122 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5123 ) 5124 if not self._match(TokenType.R_BRACKET): 5125 self._retreat(index) 5126 return None 5127 5128 return exp.DataType( 5129 this=exp.DataType.Type.MAP, 5130 expressions=[key_type, value_type], 5131 nested=True, 5132 prefix=prefix, 5133 ) 5134 5135 nested = type_token in self.NESTED_TYPE_TOKENS 5136 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5137 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5138 expressions = None 5139 maybe_func = False 5140 5141 if self._match(TokenType.L_PAREN): 5142 if is_struct: 5143 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5144 elif nested: 5145 expressions = self._parse_csv( 5146 lambda: self._parse_types( 5147 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5148 ) 5149 ) 5150 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5151 this = expressions[0] 5152 this.set("nullable", True) 5153 self._match_r_paren() 5154 return this 5155 elif type_token in self.ENUM_TYPE_TOKENS: 5156 expressions = self._parse_csv(self._parse_equality) 5157 elif is_aggregate: 5158 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5159 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5160 ) 5161 if not func_or_ident: 5162 return None 5163 expressions = [func_or_ident] 5164 if self._match(TokenType.COMMA): 5165 expressions.extend( 5166 self._parse_csv( 5167 lambda: self._parse_types( 5168 check_func=check_func, 5169 schema=schema, 5170 allow_identifiers=allow_identifiers, 5171 ) 5172 ) 5173 ) 5174 else: 5175 expressions = self._parse_csv(self._parse_type_size) 5176 5177 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5178 if type_token == TokenType.VECTOR and len(expressions) == 2: 5179 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5180 5181 if not expressions or not self._match(TokenType.R_PAREN): 5182 self._retreat(index) 5183 return None 5184 5185 maybe_func = True 5186 5187 values: t.Optional[t.List[exp.Expression]] = None 5188 5189 if nested and self._match(TokenType.LT): 5190 if is_struct: 5191 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5192 else: 5193 expressions = self._parse_csv( 5194 lambda: self._parse_types( 5195 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5196 ) 5197 ) 5198 5199 if not self._match(TokenType.GT): 5200 self.raise_error("Expecting >") 5201 5202 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5203 values = self._parse_csv(self._parse_assignment) 5204 if not values and is_struct: 5205 values = None 5206 self._retreat(self._index - 1) 5207 else: 5208 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5209 5210 if type_token in self.TIMESTAMPS: 5211 if self._match_text_seq("WITH", "TIME", "ZONE"): 5212 maybe_func = False 5213 tz_type = ( 5214 exp.DataType.Type.TIMETZ 5215 if type_token in self.TIMES 5216 else exp.DataType.Type.TIMESTAMPTZ 5217 ) 5218 this = exp.DataType(this=tz_type, expressions=expressions) 5219 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5220 maybe_func = False 5221 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5222 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5223 maybe_func = False 5224 elif type_token == TokenType.INTERVAL: 5225 unit = self._parse_var(upper=True) 5226 if unit: 5227 if self._match_text_seq("TO"): 5228 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5229 5230 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5231 else: 5232 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5233 5234 if maybe_func and check_func: 5235 index2 = self._index 5236 peek = self._parse_string() 5237 5238 if not peek: 5239 self._retreat(index) 5240 return None 5241 5242 self._retreat(index2) 5243 5244 if not this: 5245 if self._match_text_seq("UNSIGNED"): 5246 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5247 if not unsigned_type_token: 5248 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5249 5250 type_token = unsigned_type_token or type_token 5251 5252 this = exp.DataType( 5253 this=exp.DataType.Type[type_token.value], 5254 expressions=expressions, 5255 nested=nested, 5256 prefix=prefix, 5257 ) 5258 5259 # Empty arrays/structs are allowed 5260 if values is not None: 5261 cls = exp.Struct if is_struct else exp.Array 5262 this = exp.cast(cls(expressions=values), this, copy=False) 5263 5264 elif expressions: 5265 this.set("expressions", expressions) 5266 5267 # https://materialize.com/docs/sql/types/list/#type-name 5268 while self._match(TokenType.LIST): 5269 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5270 5271 index = self._index 5272 5273 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5274 matched_array = self._match(TokenType.ARRAY) 5275 5276 while self._curr: 5277 datatype_token = self._prev.token_type 5278 matched_l_bracket = self._match(TokenType.L_BRACKET) 5279 5280 if (not matched_l_bracket and not matched_array) or ( 5281 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5282 ): 5283 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5284 # not to be confused with the fixed size array parsing 5285 break 5286 5287 matched_array = False 5288 values = self._parse_csv(self._parse_assignment) or None 5289 if ( 5290 values 5291 and not schema 5292 and ( 5293 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5294 ) 5295 ): 5296 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5297 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5298 self._retreat(index) 5299 break 5300 5301 this = exp.DataType( 5302 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5303 ) 5304 self._match(TokenType.R_BRACKET) 5305 5306 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5307 converter = self.TYPE_CONVERTERS.get(this.this) 5308 if converter: 5309 this = converter(t.cast(exp.DataType, this)) 5310 5311 return this 5312 5313 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5314 index = self._index 5315 5316 if ( 5317 self._curr 5318 and self._next 5319 and self._curr.token_type in self.TYPE_TOKENS 5320 and self._next.token_type in self.TYPE_TOKENS 5321 ): 5322 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5323 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5324 this = self._parse_id_var() 5325 else: 5326 this = ( 5327 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5328 or self._parse_id_var() 5329 ) 5330 5331 self._match(TokenType.COLON) 5332 5333 if ( 5334 type_required 5335 and not isinstance(this, exp.DataType) 5336 and not self._match_set(self.TYPE_TOKENS, advance=False) 5337 ): 5338 self._retreat(index) 5339 return self._parse_types() 5340 5341 return self._parse_column_def(this) 5342 5343 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5344 if not self._match_text_seq("AT", "TIME", "ZONE"): 5345 return this 5346 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5347 5348 def _parse_column(self) -> t.Optional[exp.Expression]: 5349 this = self._parse_column_reference() 5350 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5351 5352 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5353 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5354 5355 return column 5356 5357 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5358 this = self._parse_field() 5359 if ( 5360 not this 5361 and self._match(TokenType.VALUES, advance=False) 5362 and self.VALUES_FOLLOWED_BY_PAREN 5363 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5364 ): 5365 this = self._parse_id_var() 5366 5367 if isinstance(this, exp.Identifier): 5368 # We bubble up comments from the Identifier to the Column 5369 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5370 5371 return this 5372 5373 def _parse_colon_as_variant_extract( 5374 self, this: t.Optional[exp.Expression] 5375 ) -> t.Optional[exp.Expression]: 5376 casts = [] 5377 json_path = [] 5378 escape = None 5379 5380 while self._match(TokenType.COLON): 5381 start_index = self._index 5382 5383 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5384 path = self._parse_column_ops( 5385 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5386 ) 5387 5388 # The cast :: operator has a lower precedence than the extraction operator :, so 5389 # we rearrange the AST appropriately to avoid casting the JSON path 5390 while isinstance(path, exp.Cast): 5391 casts.append(path.to) 5392 path = path.this 5393 5394 if casts: 5395 dcolon_offset = next( 5396 i 5397 for i, t in enumerate(self._tokens[start_index:]) 5398 if t.token_type == TokenType.DCOLON 5399 ) 5400 end_token = self._tokens[start_index + dcolon_offset - 1] 5401 else: 5402 end_token = self._prev 5403 5404 if path: 5405 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5406 # it'll roundtrip to a string literal in GET_PATH 5407 if isinstance(path, exp.Identifier) and path.quoted: 5408 escape = True 5409 5410 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5411 5412 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5413 # Databricks transforms it back to the colon/dot notation 5414 if json_path: 5415 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5416 5417 if json_path_expr: 5418 json_path_expr.set("escape", escape) 5419 5420 this = self.expression( 5421 exp.JSONExtract, 5422 this=this, 5423 expression=json_path_expr, 5424 variant_extract=True, 5425 ) 5426 5427 while casts: 5428 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5429 5430 return this 5431 5432 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5433 return self._parse_types() 5434 5435 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5436 this = self._parse_bracket(this) 5437 5438 while self._match_set(self.COLUMN_OPERATORS): 5439 op_token = self._prev.token_type 5440 op = self.COLUMN_OPERATORS.get(op_token) 5441 5442 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5443 field = self._parse_dcolon() 5444 if not field: 5445 self.raise_error("Expected type") 5446 elif op and self._curr: 5447 field = self._parse_column_reference() or self._parse_bracket() 5448 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5449 field = self._parse_column_ops(field) 5450 else: 5451 field = self._parse_field(any_token=True, anonymous_func=True) 5452 5453 if isinstance(field, (exp.Func, exp.Window)) and this: 5454 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5455 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5456 this = exp.replace_tree( 5457 this, 5458 lambda n: ( 5459 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5460 if n.table 5461 else n.this 5462 ) 5463 if isinstance(n, exp.Column) 5464 else n, 5465 ) 5466 5467 if op: 5468 this = op(self, this, field) 5469 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5470 this = self.expression( 5471 exp.Column, 5472 comments=this.comments, 5473 this=field, 5474 table=this.this, 5475 db=this.args.get("table"), 5476 catalog=this.args.get("db"), 5477 ) 5478 elif isinstance(field, exp.Window): 5479 # Move the exp.Dot's to the window's function 5480 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5481 field.set("this", window_func) 5482 this = field 5483 else: 5484 this = self.expression(exp.Dot, this=this, expression=field) 5485 5486 if field and field.comments: 5487 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5488 5489 this = self._parse_bracket(this) 5490 5491 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5492 5493 def _parse_primary(self) -> t.Optional[exp.Expression]: 5494 if self._match_set(self.PRIMARY_PARSERS): 5495 token_type = self._prev.token_type 5496 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5497 5498 if token_type == TokenType.STRING: 5499 expressions = [primary] 5500 while self._match(TokenType.STRING): 5501 expressions.append(exp.Literal.string(self._prev.text)) 5502 5503 if len(expressions) > 1: 5504 return self.expression(exp.Concat, expressions=expressions) 5505 5506 return primary 5507 5508 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5509 return exp.Literal.number(f"0.{self._prev.text}") 5510 5511 if self._match(TokenType.L_PAREN): 5512 comments = self._prev_comments 5513 query = self._parse_select() 5514 5515 if query: 5516 expressions = [query] 5517 else: 5518 expressions = self._parse_expressions() 5519 5520 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5521 5522 if not this and self._match(TokenType.R_PAREN, advance=False): 5523 this = self.expression(exp.Tuple) 5524 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5525 this = self._parse_subquery(this=this, parse_alias=False) 5526 elif isinstance(this, exp.Subquery): 5527 this = self._parse_subquery( 5528 this=self._parse_set_operations(this), parse_alias=False 5529 ) 5530 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5531 this = self.expression(exp.Tuple, expressions=expressions) 5532 else: 5533 this = self.expression(exp.Paren, this=this) 5534 5535 if this: 5536 this.add_comments(comments) 5537 5538 self._match_r_paren(expression=this) 5539 return this 5540 5541 return None 5542 5543 def _parse_field( 5544 self, 5545 any_token: bool = False, 5546 tokens: t.Optional[t.Collection[TokenType]] = None, 5547 anonymous_func: bool = False, 5548 ) -> t.Optional[exp.Expression]: 5549 if anonymous_func: 5550 field = ( 5551 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5552 or self._parse_primary() 5553 ) 5554 else: 5555 field = self._parse_primary() or self._parse_function( 5556 anonymous=anonymous_func, any_token=any_token 5557 ) 5558 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5559 5560 def _parse_function( 5561 self, 5562 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5563 anonymous: bool = False, 5564 optional_parens: bool = True, 5565 any_token: bool = False, 5566 ) -> t.Optional[exp.Expression]: 5567 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5568 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5569 fn_syntax = False 5570 if ( 5571 self._match(TokenType.L_BRACE, advance=False) 5572 and self._next 5573 and self._next.text.upper() == "FN" 5574 ): 5575 self._advance(2) 5576 fn_syntax = True 5577 5578 func = self._parse_function_call( 5579 functions=functions, 5580 anonymous=anonymous, 5581 optional_parens=optional_parens, 5582 any_token=any_token, 5583 ) 5584 5585 if fn_syntax: 5586 self._match(TokenType.R_BRACE) 5587 5588 return func 5589 5590 def _parse_function_call( 5591 self, 5592 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5593 anonymous: bool = False, 5594 optional_parens: bool = True, 5595 any_token: bool = False, 5596 ) -> t.Optional[exp.Expression]: 5597 if not self._curr: 5598 return None 5599 5600 comments = self._curr.comments 5601 token_type = self._curr.token_type 5602 this = self._curr.text 5603 upper = this.upper() 5604 5605 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5606 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5607 self._advance() 5608 return self._parse_window(parser(self)) 5609 5610 if not self._next or self._next.token_type != TokenType.L_PAREN: 5611 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5612 self._advance() 5613 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5614 5615 return None 5616 5617 if any_token: 5618 if token_type in self.RESERVED_TOKENS: 5619 return None 5620 elif token_type not in self.FUNC_TOKENS: 5621 return None 5622 5623 self._advance(2) 5624 5625 parser = self.FUNCTION_PARSERS.get(upper) 5626 if parser and not anonymous: 5627 this = parser(self) 5628 else: 5629 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5630 5631 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5632 this = self.expression( 5633 subquery_predicate, comments=comments, this=self._parse_select() 5634 ) 5635 self._match_r_paren() 5636 return this 5637 5638 if functions is None: 5639 functions = self.FUNCTIONS 5640 5641 function = functions.get(upper) 5642 known_function = function and not anonymous 5643 5644 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5645 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5646 5647 post_func_comments = self._curr and self._curr.comments 5648 if known_function and post_func_comments: 5649 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5650 # call we'll construct it as exp.Anonymous, even if it's "known" 5651 if any( 5652 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5653 for comment in post_func_comments 5654 ): 5655 known_function = False 5656 5657 if alias and known_function: 5658 args = self._kv_to_prop_eq(args) 5659 5660 if known_function: 5661 func_builder = t.cast(t.Callable, function) 5662 5663 if "dialect" in func_builder.__code__.co_varnames: 5664 func = func_builder(args, dialect=self.dialect) 5665 else: 5666 func = func_builder(args) 5667 5668 func = self.validate_expression(func, args) 5669 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5670 func.meta["name"] = this 5671 5672 this = func 5673 else: 5674 if token_type == TokenType.IDENTIFIER: 5675 this = exp.Identifier(this=this, quoted=True) 5676 this = self.expression(exp.Anonymous, this=this, expressions=args) 5677 5678 if isinstance(this, exp.Expression): 5679 this.add_comments(comments) 5680 5681 self._match_r_paren(this) 5682 return self._parse_window(this) 5683 5684 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5685 return expression 5686 5687 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5688 transformed = [] 5689 5690 for index, e in enumerate(expressions): 5691 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5692 if isinstance(e, exp.Alias): 5693 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5694 5695 if not isinstance(e, exp.PropertyEQ): 5696 e = self.expression( 5697 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5698 ) 5699 5700 if isinstance(e.this, exp.Column): 5701 e.this.replace(e.this.this) 5702 else: 5703 e = self._to_prop_eq(e, index) 5704 5705 transformed.append(e) 5706 5707 return transformed 5708 5709 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5710 return self._parse_statement() 5711 5712 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5713 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5714 5715 def _parse_user_defined_function( 5716 self, kind: t.Optional[TokenType] = None 5717 ) -> t.Optional[exp.Expression]: 5718 this = self._parse_table_parts(schema=True) 5719 5720 if not self._match(TokenType.L_PAREN): 5721 return this 5722 5723 expressions = self._parse_csv(self._parse_function_parameter) 5724 self._match_r_paren() 5725 return self.expression( 5726 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5727 ) 5728 5729 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5730 literal = self._parse_primary() 5731 if literal: 5732 return self.expression(exp.Introducer, this=token.text, expression=literal) 5733 5734 return self.expression(exp.Identifier, this=token.text) 5735 5736 def _parse_session_parameter(self) -> exp.SessionParameter: 5737 kind = None 5738 this = self._parse_id_var() or self._parse_primary() 5739 5740 if this and self._match(TokenType.DOT): 5741 kind = this.name 5742 this = self._parse_var() or self._parse_primary() 5743 5744 return self.expression(exp.SessionParameter, this=this, kind=kind) 5745 5746 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5747 return self._parse_id_var() 5748 5749 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5750 index = self._index 5751 5752 if self._match(TokenType.L_PAREN): 5753 expressions = t.cast( 5754 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5755 ) 5756 5757 if not self._match(TokenType.R_PAREN): 5758 self._retreat(index) 5759 else: 5760 expressions = [self._parse_lambda_arg()] 5761 5762 if self._match_set(self.LAMBDAS): 5763 return self.LAMBDAS[self._prev.token_type](self, expressions) 5764 5765 self._retreat(index) 5766 5767 this: t.Optional[exp.Expression] 5768 5769 if self._match(TokenType.DISTINCT): 5770 this = self.expression( 5771 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5772 ) 5773 else: 5774 this = self._parse_select_or_expression(alias=alias) 5775 5776 return self._parse_limit( 5777 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5778 ) 5779 5780 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5781 index = self._index 5782 if not self._match(TokenType.L_PAREN): 5783 return this 5784 5785 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5786 # expr can be of both types 5787 if self._match_set(self.SELECT_START_TOKENS): 5788 self._retreat(index) 5789 return this 5790 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5791 self._match_r_paren() 5792 return self.expression(exp.Schema, this=this, expressions=args) 5793 5794 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5795 return self._parse_column_def(self._parse_field(any_token=True)) 5796 5797 def _parse_column_def( 5798 self, this: t.Optional[exp.Expression], computed_column: bool = True 5799 ) -> t.Optional[exp.Expression]: 5800 # column defs are not really columns, they're identifiers 5801 if isinstance(this, exp.Column): 5802 this = this.this 5803 5804 if not computed_column: 5805 self._match(TokenType.ALIAS) 5806 5807 kind = self._parse_types(schema=True) 5808 5809 if self._match_text_seq("FOR", "ORDINALITY"): 5810 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5811 5812 constraints: t.List[exp.Expression] = [] 5813 5814 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5815 ("ALIAS", "MATERIALIZED") 5816 ): 5817 persisted = self._prev.text.upper() == "MATERIALIZED" 5818 constraint_kind = exp.ComputedColumnConstraint( 5819 this=self._parse_assignment(), 5820 persisted=persisted or self._match_text_seq("PERSISTED"), 5821 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5822 ) 5823 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5824 elif ( 5825 kind 5826 and self._match(TokenType.ALIAS, advance=False) 5827 and ( 5828 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5829 or (self._next and self._next.token_type == TokenType.L_PAREN) 5830 ) 5831 ): 5832 self._advance() 5833 constraints.append( 5834 self.expression( 5835 exp.ColumnConstraint, 5836 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5837 ) 5838 ) 5839 5840 while True: 5841 constraint = self._parse_column_constraint() 5842 if not constraint: 5843 break 5844 constraints.append(constraint) 5845 5846 if not kind and not constraints: 5847 return this 5848 5849 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5850 5851 def _parse_auto_increment( 5852 self, 5853 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5854 start = None 5855 increment = None 5856 5857 if self._match(TokenType.L_PAREN, advance=False): 5858 args = self._parse_wrapped_csv(self._parse_bitwise) 5859 start = seq_get(args, 0) 5860 increment = seq_get(args, 1) 5861 elif self._match_text_seq("START"): 5862 start = self._parse_bitwise() 5863 self._match_text_seq("INCREMENT") 5864 increment = self._parse_bitwise() 5865 5866 if start and increment: 5867 return exp.GeneratedAsIdentityColumnConstraint( 5868 start=start, increment=increment, this=False 5869 ) 5870 5871 return exp.AutoIncrementColumnConstraint() 5872 5873 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5874 if not self._match_text_seq("REFRESH"): 5875 self._retreat(self._index - 1) 5876 return None 5877 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5878 5879 def _parse_compress(self) -> exp.CompressColumnConstraint: 5880 if self._match(TokenType.L_PAREN, advance=False): 5881 return self.expression( 5882 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5883 ) 5884 5885 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5886 5887 def _parse_generated_as_identity( 5888 self, 5889 ) -> ( 5890 exp.GeneratedAsIdentityColumnConstraint 5891 | exp.ComputedColumnConstraint 5892 | exp.GeneratedAsRowColumnConstraint 5893 ): 5894 if self._match_text_seq("BY", "DEFAULT"): 5895 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5896 this = self.expression( 5897 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5898 ) 5899 else: 5900 self._match_text_seq("ALWAYS") 5901 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5902 5903 self._match(TokenType.ALIAS) 5904 5905 if self._match_text_seq("ROW"): 5906 start = self._match_text_seq("START") 5907 if not start: 5908 self._match(TokenType.END) 5909 hidden = self._match_text_seq("HIDDEN") 5910 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5911 5912 identity = self._match_text_seq("IDENTITY") 5913 5914 if self._match(TokenType.L_PAREN): 5915 if self._match(TokenType.START_WITH): 5916 this.set("start", self._parse_bitwise()) 5917 if self._match_text_seq("INCREMENT", "BY"): 5918 this.set("increment", self._parse_bitwise()) 5919 if self._match_text_seq("MINVALUE"): 5920 this.set("minvalue", self._parse_bitwise()) 5921 if self._match_text_seq("MAXVALUE"): 5922 this.set("maxvalue", self._parse_bitwise()) 5923 5924 if self._match_text_seq("CYCLE"): 5925 this.set("cycle", True) 5926 elif self._match_text_seq("NO", "CYCLE"): 5927 this.set("cycle", False) 5928 5929 if not identity: 5930 this.set("expression", self._parse_range()) 5931 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5932 args = self._parse_csv(self._parse_bitwise) 5933 this.set("start", seq_get(args, 0)) 5934 this.set("increment", seq_get(args, 1)) 5935 5936 self._match_r_paren() 5937 5938 return this 5939 5940 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5941 self._match_text_seq("LENGTH") 5942 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5943 5944 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5945 if self._match_text_seq("NULL"): 5946 return self.expression(exp.NotNullColumnConstraint) 5947 if self._match_text_seq("CASESPECIFIC"): 5948 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5949 if self._match_text_seq("FOR", "REPLICATION"): 5950 return self.expression(exp.NotForReplicationColumnConstraint) 5951 5952 # Unconsume the `NOT` token 5953 self._retreat(self._index - 1) 5954 return None 5955 5956 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5957 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5958 5959 procedure_option_follows = ( 5960 self._match(TokenType.WITH, advance=False) 5961 and self._next 5962 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5963 ) 5964 5965 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5966 return self.expression( 5967 exp.ColumnConstraint, 5968 this=this, 5969 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5970 ) 5971 5972 return this 5973 5974 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5975 if not self._match(TokenType.CONSTRAINT): 5976 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5977 5978 return self.expression( 5979 exp.Constraint, 5980 this=self._parse_id_var(), 5981 expressions=self._parse_unnamed_constraints(), 5982 ) 5983 5984 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5985 constraints = [] 5986 while True: 5987 constraint = self._parse_unnamed_constraint() or self._parse_function() 5988 if not constraint: 5989 break 5990 constraints.append(constraint) 5991 5992 return constraints 5993 5994 def _parse_unnamed_constraint( 5995 self, constraints: t.Optional[t.Collection[str]] = None 5996 ) -> t.Optional[exp.Expression]: 5997 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5998 constraints or self.CONSTRAINT_PARSERS 5999 ): 6000 return None 6001 6002 constraint = self._prev.text.upper() 6003 if constraint not in self.CONSTRAINT_PARSERS: 6004 self.raise_error(f"No parser found for schema constraint {constraint}.") 6005 6006 return self.CONSTRAINT_PARSERS[constraint](self) 6007 6008 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6009 return self._parse_id_var(any_token=False) 6010 6011 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6012 self._match_text_seq("KEY") 6013 return self.expression( 6014 exp.UniqueColumnConstraint, 6015 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6016 this=self._parse_schema(self._parse_unique_key()), 6017 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6018 on_conflict=self._parse_on_conflict(), 6019 options=self._parse_key_constraint_options(), 6020 ) 6021 6022 def _parse_key_constraint_options(self) -> t.List[str]: 6023 options = [] 6024 while True: 6025 if not self._curr: 6026 break 6027 6028 if self._match(TokenType.ON): 6029 action = None 6030 on = self._advance_any() and self._prev.text 6031 6032 if self._match_text_seq("NO", "ACTION"): 6033 action = "NO ACTION" 6034 elif self._match_text_seq("CASCADE"): 6035 action = "CASCADE" 6036 elif self._match_text_seq("RESTRICT"): 6037 action = "RESTRICT" 6038 elif self._match_pair(TokenType.SET, TokenType.NULL): 6039 action = "SET NULL" 6040 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6041 action = "SET DEFAULT" 6042 else: 6043 self.raise_error("Invalid key constraint") 6044 6045 options.append(f"ON {on} {action}") 6046 else: 6047 var = self._parse_var_from_options( 6048 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6049 ) 6050 if not var: 6051 break 6052 options.append(var.name) 6053 6054 return options 6055 6056 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6057 if match and not self._match(TokenType.REFERENCES): 6058 return None 6059 6060 expressions = None 6061 this = self._parse_table(schema=True) 6062 options = self._parse_key_constraint_options() 6063 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6064 6065 def _parse_foreign_key(self) -> exp.ForeignKey: 6066 expressions = self._parse_wrapped_id_vars() 6067 reference = self._parse_references() 6068 on_options = {} 6069 6070 while self._match(TokenType.ON): 6071 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6072 self.raise_error("Expected DELETE or UPDATE") 6073 6074 kind = self._prev.text.lower() 6075 6076 if self._match_text_seq("NO", "ACTION"): 6077 action = "NO ACTION" 6078 elif self._match(TokenType.SET): 6079 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6080 action = "SET " + self._prev.text.upper() 6081 else: 6082 self._advance() 6083 action = self._prev.text.upper() 6084 6085 on_options[kind] = action 6086 6087 return self.expression( 6088 exp.ForeignKey, 6089 expressions=expressions, 6090 reference=reference, 6091 options=self._parse_key_constraint_options(), 6092 **on_options, # type: ignore 6093 ) 6094 6095 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6096 return self._parse_ordered() or self._parse_field() 6097 6098 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6099 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6100 self._retreat(self._index - 1) 6101 return None 6102 6103 id_vars = self._parse_wrapped_id_vars() 6104 return self.expression( 6105 exp.PeriodForSystemTimeConstraint, 6106 this=seq_get(id_vars, 0), 6107 expression=seq_get(id_vars, 1), 6108 ) 6109 6110 def _parse_primary_key( 6111 self, wrapped_optional: bool = False, in_props: bool = False 6112 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6113 desc = ( 6114 self._match_set((TokenType.ASC, TokenType.DESC)) 6115 and self._prev.token_type == TokenType.DESC 6116 ) 6117 6118 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6119 return self.expression( 6120 exp.PrimaryKeyColumnConstraint, 6121 desc=desc, 6122 options=self._parse_key_constraint_options(), 6123 ) 6124 6125 expressions = self._parse_wrapped_csv( 6126 self._parse_primary_key_part, optional=wrapped_optional 6127 ) 6128 options = self._parse_key_constraint_options() 6129 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6130 6131 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6132 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6133 6134 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6135 """ 6136 Parses a datetime column in ODBC format. We parse the column into the corresponding 6137 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6138 same as we did for `DATE('yyyy-mm-dd')`. 6139 6140 Reference: 6141 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6142 """ 6143 self._match(TokenType.VAR) 6144 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6145 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6146 if not self._match(TokenType.R_BRACE): 6147 self.raise_error("Expected }") 6148 return expression 6149 6150 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6151 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6152 return this 6153 6154 bracket_kind = self._prev.token_type 6155 if ( 6156 bracket_kind == TokenType.L_BRACE 6157 and self._curr 6158 and self._curr.token_type == TokenType.VAR 6159 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6160 ): 6161 return self._parse_odbc_datetime_literal() 6162 6163 expressions = self._parse_csv( 6164 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6165 ) 6166 6167 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6168 self.raise_error("Expected ]") 6169 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6170 self.raise_error("Expected }") 6171 6172 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6173 if bracket_kind == TokenType.L_BRACE: 6174 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6175 elif not this: 6176 this = build_array_constructor( 6177 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6178 ) 6179 else: 6180 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6181 if constructor_type: 6182 return build_array_constructor( 6183 constructor_type, 6184 args=expressions, 6185 bracket_kind=bracket_kind, 6186 dialect=self.dialect, 6187 ) 6188 6189 expressions = apply_index_offset( 6190 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6191 ) 6192 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6193 6194 self._add_comments(this) 6195 return self._parse_bracket(this) 6196 6197 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6198 if self._match(TokenType.COLON): 6199 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6200 return this 6201 6202 def _parse_case(self) -> t.Optional[exp.Expression]: 6203 ifs = [] 6204 default = None 6205 6206 comments = self._prev_comments 6207 expression = self._parse_assignment() 6208 6209 while self._match(TokenType.WHEN): 6210 this = self._parse_assignment() 6211 self._match(TokenType.THEN) 6212 then = self._parse_assignment() 6213 ifs.append(self.expression(exp.If, this=this, true=then)) 6214 6215 if self._match(TokenType.ELSE): 6216 default = self._parse_assignment() 6217 6218 if not self._match(TokenType.END): 6219 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6220 default = exp.column("interval") 6221 else: 6222 self.raise_error("Expected END after CASE", self._prev) 6223 6224 return self.expression( 6225 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6226 ) 6227 6228 def _parse_if(self) -> t.Optional[exp.Expression]: 6229 if self._match(TokenType.L_PAREN): 6230 args = self._parse_csv( 6231 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6232 ) 6233 this = self.validate_expression(exp.If.from_arg_list(args), args) 6234 self._match_r_paren() 6235 else: 6236 index = self._index - 1 6237 6238 if self.NO_PAREN_IF_COMMANDS and index == 0: 6239 return self._parse_as_command(self._prev) 6240 6241 condition = self._parse_assignment() 6242 6243 if not condition: 6244 self._retreat(index) 6245 return None 6246 6247 self._match(TokenType.THEN) 6248 true = self._parse_assignment() 6249 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6250 self._match(TokenType.END) 6251 this = self.expression(exp.If, this=condition, true=true, false=false) 6252 6253 return this 6254 6255 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6256 if not self._match_text_seq("VALUE", "FOR"): 6257 self._retreat(self._index - 1) 6258 return None 6259 6260 return self.expression( 6261 exp.NextValueFor, 6262 this=self._parse_column(), 6263 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6264 ) 6265 6266 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6267 this = self._parse_function() or self._parse_var_or_string(upper=True) 6268 6269 if self._match(TokenType.FROM): 6270 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6271 6272 if not self._match(TokenType.COMMA): 6273 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6274 6275 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6276 6277 def _parse_gap_fill(self) -> exp.GapFill: 6278 self._match(TokenType.TABLE) 6279 this = self._parse_table() 6280 6281 self._match(TokenType.COMMA) 6282 args = [this, *self._parse_csv(self._parse_lambda)] 6283 6284 gap_fill = exp.GapFill.from_arg_list(args) 6285 return self.validate_expression(gap_fill, args) 6286 6287 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6288 this = self._parse_assignment() 6289 6290 if not self._match(TokenType.ALIAS): 6291 if self._match(TokenType.COMMA): 6292 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6293 6294 self.raise_error("Expected AS after CAST") 6295 6296 fmt = None 6297 to = self._parse_types() 6298 6299 default = self._match(TokenType.DEFAULT) 6300 if default: 6301 default = self._parse_bitwise() 6302 self._match_text_seq("ON", "CONVERSION", "ERROR") 6303 6304 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6305 fmt_string = self._parse_string() 6306 fmt = self._parse_at_time_zone(fmt_string) 6307 6308 if not to: 6309 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6310 if to.this in exp.DataType.TEMPORAL_TYPES: 6311 this = self.expression( 6312 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6313 this=this, 6314 format=exp.Literal.string( 6315 format_time( 6316 fmt_string.this if fmt_string else "", 6317 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6318 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6319 ) 6320 ), 6321 safe=safe, 6322 ) 6323 6324 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6325 this.set("zone", fmt.args["zone"]) 6326 return this 6327 elif not to: 6328 self.raise_error("Expected TYPE after CAST") 6329 elif isinstance(to, exp.Identifier): 6330 to = exp.DataType.build(to.name, udt=True) 6331 elif to.this == exp.DataType.Type.CHAR: 6332 if self._match(TokenType.CHARACTER_SET): 6333 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6334 6335 return self.expression( 6336 exp.Cast if strict else exp.TryCast, 6337 this=this, 6338 to=to, 6339 format=fmt, 6340 safe=safe, 6341 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6342 default=default, 6343 ) 6344 6345 def _parse_string_agg(self) -> exp.GroupConcat: 6346 if self._match(TokenType.DISTINCT): 6347 args: t.List[t.Optional[exp.Expression]] = [ 6348 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6349 ] 6350 if self._match(TokenType.COMMA): 6351 args.extend(self._parse_csv(self._parse_assignment)) 6352 else: 6353 args = self._parse_csv(self._parse_assignment) # type: ignore 6354 6355 if self._match_text_seq("ON", "OVERFLOW"): 6356 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6357 if self._match_text_seq("ERROR"): 6358 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6359 else: 6360 self._match_text_seq("TRUNCATE") 6361 on_overflow = self.expression( 6362 exp.OverflowTruncateBehavior, 6363 this=self._parse_string(), 6364 with_count=( 6365 self._match_text_seq("WITH", "COUNT") 6366 or not self._match_text_seq("WITHOUT", "COUNT") 6367 ), 6368 ) 6369 else: 6370 on_overflow = None 6371 6372 index = self._index 6373 if not self._match(TokenType.R_PAREN) and args: 6374 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6375 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6376 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6377 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6378 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6379 6380 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6381 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6382 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6383 if not self._match_text_seq("WITHIN", "GROUP"): 6384 self._retreat(index) 6385 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6386 6387 # The corresponding match_r_paren will be called in parse_function (caller) 6388 self._match_l_paren() 6389 6390 return self.expression( 6391 exp.GroupConcat, 6392 this=self._parse_order(this=seq_get(args, 0)), 6393 separator=seq_get(args, 1), 6394 on_overflow=on_overflow, 6395 ) 6396 6397 def _parse_convert( 6398 self, strict: bool, safe: t.Optional[bool] = None 6399 ) -> t.Optional[exp.Expression]: 6400 this = self._parse_bitwise() 6401 6402 if self._match(TokenType.USING): 6403 to: t.Optional[exp.Expression] = self.expression( 6404 exp.CharacterSet, this=self._parse_var() 6405 ) 6406 elif self._match(TokenType.COMMA): 6407 to = self._parse_types() 6408 else: 6409 to = None 6410 6411 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6412 6413 def _parse_xml_table(self) -> exp.XMLTable: 6414 namespaces = None 6415 passing = None 6416 columns = None 6417 6418 if self._match_text_seq("XMLNAMESPACES", "("): 6419 namespaces = self._parse_xml_namespace() 6420 self._match_text_seq(")", ",") 6421 6422 this = self._parse_string() 6423 6424 if self._match_text_seq("PASSING"): 6425 # The BY VALUE keywords are optional and are provided for semantic clarity 6426 self._match_text_seq("BY", "VALUE") 6427 passing = self._parse_csv(self._parse_column) 6428 6429 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6430 6431 if self._match_text_seq("COLUMNS"): 6432 columns = self._parse_csv(self._parse_field_def) 6433 6434 return self.expression( 6435 exp.XMLTable, 6436 this=this, 6437 namespaces=namespaces, 6438 passing=passing, 6439 columns=columns, 6440 by_ref=by_ref, 6441 ) 6442 6443 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6444 namespaces = [] 6445 6446 while True: 6447 if self._match(TokenType.DEFAULT): 6448 uri = self._parse_string() 6449 else: 6450 uri = self._parse_alias(self._parse_string()) 6451 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6452 if not self._match(TokenType.COMMA): 6453 break 6454 6455 return namespaces 6456 6457 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6458 """ 6459 There are generally two variants of the DECODE function: 6460 6461 - DECODE(bin, charset) 6462 - DECODE(expression, search, result [, search, result] ... [, default]) 6463 6464 The second variant will always be parsed into a CASE expression. Note that NULL 6465 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6466 instead of relying on pattern matching. 6467 """ 6468 args = self._parse_csv(self._parse_assignment) 6469 6470 if len(args) < 3: 6471 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6472 6473 expression, *expressions = args 6474 if not expression: 6475 return None 6476 6477 ifs = [] 6478 for search, result in zip(expressions[::2], expressions[1::2]): 6479 if not search or not result: 6480 return None 6481 6482 if isinstance(search, exp.Literal): 6483 ifs.append( 6484 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6485 ) 6486 elif isinstance(search, exp.Null): 6487 ifs.append( 6488 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6489 ) 6490 else: 6491 cond = exp.or_( 6492 exp.EQ(this=expression.copy(), expression=search), 6493 exp.and_( 6494 exp.Is(this=expression.copy(), expression=exp.Null()), 6495 exp.Is(this=search.copy(), expression=exp.Null()), 6496 copy=False, 6497 ), 6498 copy=False, 6499 ) 6500 ifs.append(exp.If(this=cond, true=result)) 6501 6502 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6503 6504 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6505 self._match_text_seq("KEY") 6506 key = self._parse_column() 6507 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6508 self._match_text_seq("VALUE") 6509 value = self._parse_bitwise() 6510 6511 if not key and not value: 6512 return None 6513 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6514 6515 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6516 if not this or not self._match_text_seq("FORMAT", "JSON"): 6517 return this 6518 6519 return self.expression(exp.FormatJson, this=this) 6520 6521 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6522 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6523 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6524 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6525 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6526 else: 6527 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6528 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6529 6530 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6531 6532 if not empty and not error and not null: 6533 return None 6534 6535 return self.expression( 6536 exp.OnCondition, 6537 empty=empty, 6538 error=error, 6539 null=null, 6540 ) 6541 6542 def _parse_on_handling( 6543 self, on: str, *values: str 6544 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6545 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6546 for value in values: 6547 if self._match_text_seq(value, "ON", on): 6548 return f"{value} ON {on}" 6549 6550 index = self._index 6551 if self._match(TokenType.DEFAULT): 6552 default_value = self._parse_bitwise() 6553 if self._match_text_seq("ON", on): 6554 return default_value 6555 6556 self._retreat(index) 6557 6558 return None 6559 6560 @t.overload 6561 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6562 6563 @t.overload 6564 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6565 6566 def _parse_json_object(self, agg=False): 6567 star = self._parse_star() 6568 expressions = ( 6569 [star] 6570 if star 6571 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6572 ) 6573 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6574 6575 unique_keys = None 6576 if self._match_text_seq("WITH", "UNIQUE"): 6577 unique_keys = True 6578 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6579 unique_keys = False 6580 6581 self._match_text_seq("KEYS") 6582 6583 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6584 self._parse_type() 6585 ) 6586 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6587 6588 return self.expression( 6589 exp.JSONObjectAgg if agg else exp.JSONObject, 6590 expressions=expressions, 6591 null_handling=null_handling, 6592 unique_keys=unique_keys, 6593 return_type=return_type, 6594 encoding=encoding, 6595 ) 6596 6597 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6598 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6599 if not self._match_text_seq("NESTED"): 6600 this = self._parse_id_var() 6601 kind = self._parse_types(allow_identifiers=False) 6602 nested = None 6603 else: 6604 this = None 6605 kind = None 6606 nested = True 6607 6608 path = self._match_text_seq("PATH") and self._parse_string() 6609 nested_schema = nested and self._parse_json_schema() 6610 6611 return self.expression( 6612 exp.JSONColumnDef, 6613 this=this, 6614 kind=kind, 6615 path=path, 6616 nested_schema=nested_schema, 6617 ) 6618 6619 def _parse_json_schema(self) -> exp.JSONSchema: 6620 self._match_text_seq("COLUMNS") 6621 return self.expression( 6622 exp.JSONSchema, 6623 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6624 ) 6625 6626 def _parse_json_table(self) -> exp.JSONTable: 6627 this = self._parse_format_json(self._parse_bitwise()) 6628 path = self._match(TokenType.COMMA) and self._parse_string() 6629 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6630 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6631 schema = self._parse_json_schema() 6632 6633 return exp.JSONTable( 6634 this=this, 6635 schema=schema, 6636 path=path, 6637 error_handling=error_handling, 6638 empty_handling=empty_handling, 6639 ) 6640 6641 def _parse_match_against(self) -> exp.MatchAgainst: 6642 expressions = self._parse_csv(self._parse_column) 6643 6644 self._match_text_seq(")", "AGAINST", "(") 6645 6646 this = self._parse_string() 6647 6648 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6649 modifier = "IN NATURAL LANGUAGE MODE" 6650 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6651 modifier = f"{modifier} WITH QUERY EXPANSION" 6652 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6653 modifier = "IN BOOLEAN MODE" 6654 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6655 modifier = "WITH QUERY EXPANSION" 6656 else: 6657 modifier = None 6658 6659 return self.expression( 6660 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6661 ) 6662 6663 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6664 def _parse_open_json(self) -> exp.OpenJSON: 6665 this = self._parse_bitwise() 6666 path = self._match(TokenType.COMMA) and self._parse_string() 6667 6668 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6669 this = self._parse_field(any_token=True) 6670 kind = self._parse_types() 6671 path = self._parse_string() 6672 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6673 6674 return self.expression( 6675 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6676 ) 6677 6678 expressions = None 6679 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6680 self._match_l_paren() 6681 expressions = self._parse_csv(_parse_open_json_column_def) 6682 6683 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6684 6685 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6686 args = self._parse_csv(self._parse_bitwise) 6687 6688 if self._match(TokenType.IN): 6689 return self.expression( 6690 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6691 ) 6692 6693 if haystack_first: 6694 haystack = seq_get(args, 0) 6695 needle = seq_get(args, 1) 6696 else: 6697 haystack = seq_get(args, 1) 6698 needle = seq_get(args, 0) 6699 6700 return self.expression( 6701 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6702 ) 6703 6704 def _parse_predict(self) -> exp.Predict: 6705 self._match_text_seq("MODEL") 6706 this = self._parse_table() 6707 6708 self._match(TokenType.COMMA) 6709 self._match_text_seq("TABLE") 6710 6711 return self.expression( 6712 exp.Predict, 6713 this=this, 6714 expression=self._parse_table(), 6715 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6716 ) 6717 6718 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6719 args = self._parse_csv(self._parse_table) 6720 return exp.JoinHint(this=func_name.upper(), expressions=args) 6721 6722 def _parse_substring(self) -> exp.Substring: 6723 # Postgres supports the form: substring(string [from int] [for int]) 6724 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6725 6726 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6727 6728 if self._match(TokenType.FROM): 6729 args.append(self._parse_bitwise()) 6730 if self._match(TokenType.FOR): 6731 if len(args) == 1: 6732 args.append(exp.Literal.number(1)) 6733 args.append(self._parse_bitwise()) 6734 6735 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6736 6737 def _parse_trim(self) -> exp.Trim: 6738 # https://www.w3resource.com/sql/character-functions/trim.php 6739 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6740 6741 position = None 6742 collation = None 6743 expression = None 6744 6745 if self._match_texts(self.TRIM_TYPES): 6746 position = self._prev.text.upper() 6747 6748 this = self._parse_bitwise() 6749 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6750 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6751 expression = self._parse_bitwise() 6752 6753 if invert_order: 6754 this, expression = expression, this 6755 6756 if self._match(TokenType.COLLATE): 6757 collation = self._parse_bitwise() 6758 6759 return self.expression( 6760 exp.Trim, this=this, position=position, expression=expression, collation=collation 6761 ) 6762 6763 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6764 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6765 6766 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6767 return self._parse_window(self._parse_id_var(), alias=True) 6768 6769 def _parse_respect_or_ignore_nulls( 6770 self, this: t.Optional[exp.Expression] 6771 ) -> t.Optional[exp.Expression]: 6772 if self._match_text_seq("IGNORE", "NULLS"): 6773 return self.expression(exp.IgnoreNulls, this=this) 6774 if self._match_text_seq("RESPECT", "NULLS"): 6775 return self.expression(exp.RespectNulls, this=this) 6776 return this 6777 6778 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6779 if self._match(TokenType.HAVING): 6780 self._match_texts(("MAX", "MIN")) 6781 max = self._prev.text.upper() != "MIN" 6782 return self.expression( 6783 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6784 ) 6785 6786 return this 6787 6788 def _parse_window( 6789 self, this: t.Optional[exp.Expression], alias: bool = False 6790 ) -> t.Optional[exp.Expression]: 6791 func = this 6792 comments = func.comments if isinstance(func, exp.Expression) else None 6793 6794 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6795 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6796 if self._match_text_seq("WITHIN", "GROUP"): 6797 order = self._parse_wrapped(self._parse_order) 6798 this = self.expression(exp.WithinGroup, this=this, expression=order) 6799 6800 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6801 self._match(TokenType.WHERE) 6802 this = self.expression( 6803 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6804 ) 6805 self._match_r_paren() 6806 6807 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6808 # Some dialects choose to implement and some do not. 6809 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6810 6811 # There is some code above in _parse_lambda that handles 6812 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6813 6814 # The below changes handle 6815 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6816 6817 # Oracle allows both formats 6818 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6819 # and Snowflake chose to do the same for familiarity 6820 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6821 if isinstance(this, exp.AggFunc): 6822 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6823 6824 if ignore_respect and ignore_respect is not this: 6825 ignore_respect.replace(ignore_respect.this) 6826 this = self.expression(ignore_respect.__class__, this=this) 6827 6828 this = self._parse_respect_or_ignore_nulls(this) 6829 6830 # bigquery select from window x AS (partition by ...) 6831 if alias: 6832 over = None 6833 self._match(TokenType.ALIAS) 6834 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6835 return this 6836 else: 6837 over = self._prev.text.upper() 6838 6839 if comments and isinstance(func, exp.Expression): 6840 func.pop_comments() 6841 6842 if not self._match(TokenType.L_PAREN): 6843 return self.expression( 6844 exp.Window, 6845 comments=comments, 6846 this=this, 6847 alias=self._parse_id_var(False), 6848 over=over, 6849 ) 6850 6851 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6852 6853 first = self._match(TokenType.FIRST) 6854 if self._match_text_seq("LAST"): 6855 first = False 6856 6857 partition, order = self._parse_partition_and_order() 6858 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6859 6860 if kind: 6861 self._match(TokenType.BETWEEN) 6862 start = self._parse_window_spec() 6863 self._match(TokenType.AND) 6864 end = self._parse_window_spec() 6865 6866 spec = self.expression( 6867 exp.WindowSpec, 6868 kind=kind, 6869 start=start["value"], 6870 start_side=start["side"], 6871 end=end["value"], 6872 end_side=end["side"], 6873 ) 6874 else: 6875 spec = None 6876 6877 self._match_r_paren() 6878 6879 window = self.expression( 6880 exp.Window, 6881 comments=comments, 6882 this=this, 6883 partition_by=partition, 6884 order=order, 6885 spec=spec, 6886 alias=window_alias, 6887 over=over, 6888 first=first, 6889 ) 6890 6891 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6892 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6893 return self._parse_window(window, alias=alias) 6894 6895 return window 6896 6897 def _parse_partition_and_order( 6898 self, 6899 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6900 return self._parse_partition_by(), self._parse_order() 6901 6902 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6903 self._match(TokenType.BETWEEN) 6904 6905 return { 6906 "value": ( 6907 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6908 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6909 or self._parse_bitwise() 6910 ), 6911 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6912 } 6913 6914 def _parse_alias( 6915 self, this: t.Optional[exp.Expression], explicit: bool = False 6916 ) -> t.Optional[exp.Expression]: 6917 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6918 # so this section tries to parse the clause version and if it fails, it treats the token 6919 # as an identifier (alias) 6920 if self._can_parse_limit_or_offset(): 6921 return this 6922 6923 any_token = self._match(TokenType.ALIAS) 6924 comments = self._prev_comments or [] 6925 6926 if explicit and not any_token: 6927 return this 6928 6929 if self._match(TokenType.L_PAREN): 6930 aliases = self.expression( 6931 exp.Aliases, 6932 comments=comments, 6933 this=this, 6934 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6935 ) 6936 self._match_r_paren(aliases) 6937 return aliases 6938 6939 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6940 self.STRING_ALIASES and self._parse_string_as_identifier() 6941 ) 6942 6943 if alias: 6944 comments.extend(alias.pop_comments()) 6945 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6946 column = this.this 6947 6948 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6949 if not this.comments and column and column.comments: 6950 this.comments = column.pop_comments() 6951 6952 return this 6953 6954 def _parse_id_var( 6955 self, 6956 any_token: bool = True, 6957 tokens: t.Optional[t.Collection[TokenType]] = None, 6958 ) -> t.Optional[exp.Expression]: 6959 expression = self._parse_identifier() 6960 if not expression and ( 6961 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6962 ): 6963 quoted = self._prev.token_type == TokenType.STRING 6964 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6965 6966 return expression 6967 6968 def _parse_string(self) -> t.Optional[exp.Expression]: 6969 if self._match_set(self.STRING_PARSERS): 6970 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6971 return self._parse_placeholder() 6972 6973 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6974 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6975 6976 def _parse_number(self) -> t.Optional[exp.Expression]: 6977 if self._match_set(self.NUMERIC_PARSERS): 6978 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6979 return self._parse_placeholder() 6980 6981 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6982 if self._match(TokenType.IDENTIFIER): 6983 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6984 return self._parse_placeholder() 6985 6986 def _parse_var( 6987 self, 6988 any_token: bool = False, 6989 tokens: t.Optional[t.Collection[TokenType]] = None, 6990 upper: bool = False, 6991 ) -> t.Optional[exp.Expression]: 6992 if ( 6993 (any_token and self._advance_any()) 6994 or self._match(TokenType.VAR) 6995 or (self._match_set(tokens) if tokens else False) 6996 ): 6997 return self.expression( 6998 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6999 ) 7000 return self._parse_placeholder() 7001 7002 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7003 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7004 self._advance() 7005 return self._prev 7006 return None 7007 7008 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7009 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7010 7011 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7012 return self._parse_primary() or self._parse_var(any_token=True) 7013 7014 def _parse_null(self) -> t.Optional[exp.Expression]: 7015 if self._match_set(self.NULL_TOKENS): 7016 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7017 return self._parse_placeholder() 7018 7019 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7020 if self._match(TokenType.TRUE): 7021 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7022 if self._match(TokenType.FALSE): 7023 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7024 return self._parse_placeholder() 7025 7026 def _parse_star(self) -> t.Optional[exp.Expression]: 7027 if self._match(TokenType.STAR): 7028 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7029 return self._parse_placeholder() 7030 7031 def _parse_parameter(self) -> exp.Parameter: 7032 this = self._parse_identifier() or self._parse_primary_or_var() 7033 return self.expression(exp.Parameter, this=this) 7034 7035 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7036 if self._match_set(self.PLACEHOLDER_PARSERS): 7037 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7038 if placeholder: 7039 return placeholder 7040 self._advance(-1) 7041 return None 7042 7043 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7044 if not self._match_texts(keywords): 7045 return None 7046 if self._match(TokenType.L_PAREN, advance=False): 7047 return self._parse_wrapped_csv(self._parse_expression) 7048 7049 expression = self._parse_expression() 7050 return [expression] if expression else None 7051 7052 def _parse_csv( 7053 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7054 ) -> t.List[exp.Expression]: 7055 parse_result = parse_method() 7056 items = [parse_result] if parse_result is not None else [] 7057 7058 while self._match(sep): 7059 self._add_comments(parse_result) 7060 parse_result = parse_method() 7061 if parse_result is not None: 7062 items.append(parse_result) 7063 7064 return items 7065 7066 def _parse_tokens( 7067 self, parse_method: t.Callable, expressions: t.Dict 7068 ) -> t.Optional[exp.Expression]: 7069 this = parse_method() 7070 7071 while self._match_set(expressions): 7072 this = self.expression( 7073 expressions[self._prev.token_type], 7074 this=this, 7075 comments=self._prev_comments, 7076 expression=parse_method(), 7077 ) 7078 7079 return this 7080 7081 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7082 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7083 7084 def _parse_wrapped_csv( 7085 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7086 ) -> t.List[exp.Expression]: 7087 return self._parse_wrapped( 7088 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7089 ) 7090 7091 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7092 wrapped = self._match(TokenType.L_PAREN) 7093 if not wrapped and not optional: 7094 self.raise_error("Expecting (") 7095 parse_result = parse_method() 7096 if wrapped: 7097 self._match_r_paren() 7098 return parse_result 7099 7100 def _parse_expressions(self) -> t.List[exp.Expression]: 7101 return self._parse_csv(self._parse_expression) 7102 7103 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7104 return self._parse_select() or self._parse_set_operations( 7105 self._parse_alias(self._parse_assignment(), explicit=True) 7106 if alias 7107 else self._parse_assignment() 7108 ) 7109 7110 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7111 return self._parse_query_modifiers( 7112 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7113 ) 7114 7115 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7116 this = None 7117 if self._match_texts(self.TRANSACTION_KIND): 7118 this = self._prev.text 7119 7120 self._match_texts(("TRANSACTION", "WORK")) 7121 7122 modes = [] 7123 while True: 7124 mode = [] 7125 while self._match(TokenType.VAR): 7126 mode.append(self._prev.text) 7127 7128 if mode: 7129 modes.append(" ".join(mode)) 7130 if not self._match(TokenType.COMMA): 7131 break 7132 7133 return self.expression(exp.Transaction, this=this, modes=modes) 7134 7135 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7136 chain = None 7137 savepoint = None 7138 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7139 7140 self._match_texts(("TRANSACTION", "WORK")) 7141 7142 if self._match_text_seq("TO"): 7143 self._match_text_seq("SAVEPOINT") 7144 savepoint = self._parse_id_var() 7145 7146 if self._match(TokenType.AND): 7147 chain = not self._match_text_seq("NO") 7148 self._match_text_seq("CHAIN") 7149 7150 if is_rollback: 7151 return self.expression(exp.Rollback, savepoint=savepoint) 7152 7153 return self.expression(exp.Commit, chain=chain) 7154 7155 def _parse_refresh(self) -> exp.Refresh: 7156 self._match(TokenType.TABLE) 7157 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7158 7159 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7160 if not self._match_text_seq("ADD"): 7161 return None 7162 7163 self._match(TokenType.COLUMN) 7164 exists_column = self._parse_exists(not_=True) 7165 expression = self._parse_field_def() 7166 7167 if expression: 7168 expression.set("exists", exists_column) 7169 7170 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7171 if self._match_texts(("FIRST", "AFTER")): 7172 position = self._prev.text 7173 column_position = self.expression( 7174 exp.ColumnPosition, this=self._parse_column(), position=position 7175 ) 7176 expression.set("position", column_position) 7177 7178 return expression 7179 7180 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7181 drop = self._match(TokenType.DROP) and self._parse_drop() 7182 if drop and not isinstance(drop, exp.Command): 7183 drop.set("kind", drop.args.get("kind", "COLUMN")) 7184 return drop 7185 7186 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7187 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7188 return self.expression( 7189 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7190 ) 7191 7192 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7193 index = self._index - 1 7194 7195 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7196 return self._parse_csv( 7197 lambda: self.expression( 7198 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7199 ) 7200 ) 7201 7202 self._retreat(index) 7203 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7204 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7205 7206 if self._match_text_seq("ADD", "COLUMNS"): 7207 schema = self._parse_schema() 7208 if schema: 7209 return [schema] 7210 return [] 7211 7212 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7213 7214 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7215 if self._match_texts(self.ALTER_ALTER_PARSERS): 7216 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7217 7218 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7219 # keyword after ALTER we default to parsing this statement 7220 self._match(TokenType.COLUMN) 7221 column = self._parse_field(any_token=True) 7222 7223 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7224 return self.expression(exp.AlterColumn, this=column, drop=True) 7225 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7226 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7227 if self._match(TokenType.COMMENT): 7228 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7229 if self._match_text_seq("DROP", "NOT", "NULL"): 7230 return self.expression( 7231 exp.AlterColumn, 7232 this=column, 7233 drop=True, 7234 allow_null=True, 7235 ) 7236 if self._match_text_seq("SET", "NOT", "NULL"): 7237 return self.expression( 7238 exp.AlterColumn, 7239 this=column, 7240 allow_null=False, 7241 ) 7242 7243 if self._match_text_seq("SET", "VISIBLE"): 7244 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7245 if self._match_text_seq("SET", "INVISIBLE"): 7246 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7247 7248 self._match_text_seq("SET", "DATA") 7249 self._match_text_seq("TYPE") 7250 return self.expression( 7251 exp.AlterColumn, 7252 this=column, 7253 dtype=self._parse_types(), 7254 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7255 using=self._match(TokenType.USING) and self._parse_assignment(), 7256 ) 7257 7258 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7259 if self._match_texts(("ALL", "EVEN", "AUTO")): 7260 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7261 7262 self._match_text_seq("KEY", "DISTKEY") 7263 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7264 7265 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7266 if compound: 7267 self._match_text_seq("SORTKEY") 7268 7269 if self._match(TokenType.L_PAREN, advance=False): 7270 return self.expression( 7271 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7272 ) 7273 7274 self._match_texts(("AUTO", "NONE")) 7275 return self.expression( 7276 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7277 ) 7278 7279 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7280 index = self._index - 1 7281 7282 partition_exists = self._parse_exists() 7283 if self._match(TokenType.PARTITION, advance=False): 7284 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7285 7286 self._retreat(index) 7287 return self._parse_csv(self._parse_drop_column) 7288 7289 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7290 if self._match(TokenType.COLUMN): 7291 exists = self._parse_exists() 7292 old_column = self._parse_column() 7293 to = self._match_text_seq("TO") 7294 new_column = self._parse_column() 7295 7296 if old_column is None or to is None or new_column is None: 7297 return None 7298 7299 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7300 7301 self._match_text_seq("TO") 7302 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7303 7304 def _parse_alter_table_set(self) -> exp.AlterSet: 7305 alter_set = self.expression(exp.AlterSet) 7306 7307 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7308 "TABLE", "PROPERTIES" 7309 ): 7310 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7311 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7312 alter_set.set("expressions", [self._parse_assignment()]) 7313 elif self._match_texts(("LOGGED", "UNLOGGED")): 7314 alter_set.set("option", exp.var(self._prev.text.upper())) 7315 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7316 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7317 elif self._match_text_seq("LOCATION"): 7318 alter_set.set("location", self._parse_field()) 7319 elif self._match_text_seq("ACCESS", "METHOD"): 7320 alter_set.set("access_method", self._parse_field()) 7321 elif self._match_text_seq("TABLESPACE"): 7322 alter_set.set("tablespace", self._parse_field()) 7323 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7324 alter_set.set("file_format", [self._parse_field()]) 7325 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7326 alter_set.set("file_format", self._parse_wrapped_options()) 7327 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7328 alter_set.set("copy_options", self._parse_wrapped_options()) 7329 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7330 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7331 else: 7332 if self._match_text_seq("SERDE"): 7333 alter_set.set("serde", self._parse_field()) 7334 7335 alter_set.set("expressions", [self._parse_properties()]) 7336 7337 return alter_set 7338 7339 def _parse_alter(self) -> exp.Alter | exp.Command: 7340 start = self._prev 7341 7342 alter_token = self._match_set(self.ALTERABLES) and self._prev 7343 if not alter_token: 7344 return self._parse_as_command(start) 7345 7346 exists = self._parse_exists() 7347 only = self._match_text_seq("ONLY") 7348 this = self._parse_table(schema=True) 7349 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7350 7351 if self._next: 7352 self._advance() 7353 7354 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7355 if parser: 7356 actions = ensure_list(parser(self)) 7357 not_valid = self._match_text_seq("NOT", "VALID") 7358 options = self._parse_csv(self._parse_property) 7359 7360 if not self._curr and actions: 7361 return self.expression( 7362 exp.Alter, 7363 this=this, 7364 kind=alter_token.text.upper(), 7365 exists=exists, 7366 actions=actions, 7367 only=only, 7368 options=options, 7369 cluster=cluster, 7370 not_valid=not_valid, 7371 ) 7372 7373 return self._parse_as_command(start) 7374 7375 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7376 start = self._prev 7377 # https://duckdb.org/docs/sql/statements/analyze 7378 if not self._curr: 7379 return self.expression(exp.Analyze) 7380 7381 options = [] 7382 while self._match_texts(self.ANALYZE_STYLES): 7383 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7384 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7385 else: 7386 options.append(self._prev.text.upper()) 7387 7388 this: t.Optional[exp.Expression] = None 7389 inner_expression: t.Optional[exp.Expression] = None 7390 7391 kind = self._curr and self._curr.text.upper() 7392 7393 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7394 this = self._parse_table_parts() 7395 elif self._match_text_seq("TABLES"): 7396 if self._match_set((TokenType.FROM, TokenType.IN)): 7397 kind = f"{kind} {self._prev.text.upper()}" 7398 this = self._parse_table(schema=True, is_db_reference=True) 7399 elif self._match_text_seq("DATABASE"): 7400 this = self._parse_table(schema=True, is_db_reference=True) 7401 elif self._match_text_seq("CLUSTER"): 7402 this = self._parse_table() 7403 # Try matching inner expr keywords before fallback to parse table. 7404 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7405 kind = None 7406 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7407 else: 7408 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7409 kind = None 7410 this = self._parse_table_parts() 7411 7412 partition = self._try_parse(self._parse_partition) 7413 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7414 return self._parse_as_command(start) 7415 7416 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7417 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7418 "WITH", "ASYNC", "MODE" 7419 ): 7420 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7421 else: 7422 mode = None 7423 7424 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7425 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7426 7427 properties = self._parse_properties() 7428 return self.expression( 7429 exp.Analyze, 7430 kind=kind, 7431 this=this, 7432 mode=mode, 7433 partition=partition, 7434 properties=properties, 7435 expression=inner_expression, 7436 options=options, 7437 ) 7438 7439 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7440 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7441 this = None 7442 kind = self._prev.text.upper() 7443 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7444 expressions = [] 7445 7446 if not self._match_text_seq("STATISTICS"): 7447 self.raise_error("Expecting token STATISTICS") 7448 7449 if self._match_text_seq("NOSCAN"): 7450 this = "NOSCAN" 7451 elif self._match(TokenType.FOR): 7452 if self._match_text_seq("ALL", "COLUMNS"): 7453 this = "FOR ALL COLUMNS" 7454 if self._match_texts("COLUMNS"): 7455 this = "FOR COLUMNS" 7456 expressions = self._parse_csv(self._parse_column_reference) 7457 elif self._match_text_seq("SAMPLE"): 7458 sample = self._parse_number() 7459 expressions = [ 7460 self.expression( 7461 exp.AnalyzeSample, 7462 sample=sample, 7463 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7464 ) 7465 ] 7466 7467 return self.expression( 7468 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7469 ) 7470 7471 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7472 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7473 kind = None 7474 this = None 7475 expression: t.Optional[exp.Expression] = None 7476 if self._match_text_seq("REF", "UPDATE"): 7477 kind = "REF" 7478 this = "UPDATE" 7479 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7480 this = "UPDATE SET DANGLING TO NULL" 7481 elif self._match_text_seq("STRUCTURE"): 7482 kind = "STRUCTURE" 7483 if self._match_text_seq("CASCADE", "FAST"): 7484 this = "CASCADE FAST" 7485 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7486 ("ONLINE", "OFFLINE") 7487 ): 7488 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7489 expression = self._parse_into() 7490 7491 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7492 7493 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7494 this = self._prev.text.upper() 7495 if self._match_text_seq("COLUMNS"): 7496 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7497 return None 7498 7499 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7500 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7501 if self._match_text_seq("STATISTICS"): 7502 return self.expression(exp.AnalyzeDelete, kind=kind) 7503 return None 7504 7505 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7506 if self._match_text_seq("CHAINED", "ROWS"): 7507 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7508 return None 7509 7510 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7511 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7512 this = self._prev.text.upper() 7513 expression: t.Optional[exp.Expression] = None 7514 expressions = [] 7515 update_options = None 7516 7517 if self._match_text_seq("HISTOGRAM", "ON"): 7518 expressions = self._parse_csv(self._parse_column_reference) 7519 with_expressions = [] 7520 while self._match(TokenType.WITH): 7521 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7522 if self._match_texts(("SYNC", "ASYNC")): 7523 if self._match_text_seq("MODE", advance=False): 7524 with_expressions.append(f"{self._prev.text.upper()} MODE") 7525 self._advance() 7526 else: 7527 buckets = self._parse_number() 7528 if self._match_text_seq("BUCKETS"): 7529 with_expressions.append(f"{buckets} BUCKETS") 7530 if with_expressions: 7531 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7532 7533 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7534 TokenType.UPDATE, advance=False 7535 ): 7536 update_options = self._prev.text.upper() 7537 self._advance() 7538 elif self._match_text_seq("USING", "DATA"): 7539 expression = self.expression(exp.UsingData, this=self._parse_string()) 7540 7541 return self.expression( 7542 exp.AnalyzeHistogram, 7543 this=this, 7544 expressions=expressions, 7545 expression=expression, 7546 update_options=update_options, 7547 ) 7548 7549 def _parse_merge(self) -> exp.Merge: 7550 self._match(TokenType.INTO) 7551 target = self._parse_table() 7552 7553 if target and self._match(TokenType.ALIAS, advance=False): 7554 target.set("alias", self._parse_table_alias()) 7555 7556 self._match(TokenType.USING) 7557 using = self._parse_table() 7558 7559 self._match(TokenType.ON) 7560 on = self._parse_assignment() 7561 7562 return self.expression( 7563 exp.Merge, 7564 this=target, 7565 using=using, 7566 on=on, 7567 whens=self._parse_when_matched(), 7568 returning=self._parse_returning(), 7569 ) 7570 7571 def _parse_when_matched(self) -> exp.Whens: 7572 whens = [] 7573 7574 while self._match(TokenType.WHEN): 7575 matched = not self._match(TokenType.NOT) 7576 self._match_text_seq("MATCHED") 7577 source = ( 7578 False 7579 if self._match_text_seq("BY", "TARGET") 7580 else self._match_text_seq("BY", "SOURCE") 7581 ) 7582 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7583 7584 self._match(TokenType.THEN) 7585 7586 if self._match(TokenType.INSERT): 7587 this = self._parse_star() 7588 if this: 7589 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7590 else: 7591 then = self.expression( 7592 exp.Insert, 7593 this=exp.var("ROW") 7594 if self._match_text_seq("ROW") 7595 else self._parse_value(values=False), 7596 expression=self._match_text_seq("VALUES") and self._parse_value(), 7597 ) 7598 elif self._match(TokenType.UPDATE): 7599 expressions = self._parse_star() 7600 if expressions: 7601 then = self.expression(exp.Update, expressions=expressions) 7602 else: 7603 then = self.expression( 7604 exp.Update, 7605 expressions=self._match(TokenType.SET) 7606 and self._parse_csv(self._parse_equality), 7607 ) 7608 elif self._match(TokenType.DELETE): 7609 then = self.expression(exp.Var, this=self._prev.text) 7610 else: 7611 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7612 7613 whens.append( 7614 self.expression( 7615 exp.When, 7616 matched=matched, 7617 source=source, 7618 condition=condition, 7619 then=then, 7620 ) 7621 ) 7622 return self.expression(exp.Whens, expressions=whens) 7623 7624 def _parse_show(self) -> t.Optional[exp.Expression]: 7625 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7626 if parser: 7627 return parser(self) 7628 return self._parse_as_command(self._prev) 7629 7630 def _parse_set_item_assignment( 7631 self, kind: t.Optional[str] = None 7632 ) -> t.Optional[exp.Expression]: 7633 index = self._index 7634 7635 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7636 return self._parse_set_transaction(global_=kind == "GLOBAL") 7637 7638 left = self._parse_primary() or self._parse_column() 7639 assignment_delimiter = self._match_texts(("=", "TO")) 7640 7641 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7642 self._retreat(index) 7643 return None 7644 7645 right = self._parse_statement() or self._parse_id_var() 7646 if isinstance(right, (exp.Column, exp.Identifier)): 7647 right = exp.var(right.name) 7648 7649 this = self.expression(exp.EQ, this=left, expression=right) 7650 return self.expression(exp.SetItem, this=this, kind=kind) 7651 7652 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7653 self._match_text_seq("TRANSACTION") 7654 characteristics = self._parse_csv( 7655 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7656 ) 7657 return self.expression( 7658 exp.SetItem, 7659 expressions=characteristics, 7660 kind="TRANSACTION", 7661 **{"global": global_}, # type: ignore 7662 ) 7663 7664 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7665 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7666 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7667 7668 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7669 index = self._index 7670 set_ = self.expression( 7671 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7672 ) 7673 7674 if self._curr: 7675 self._retreat(index) 7676 return self._parse_as_command(self._prev) 7677 7678 return set_ 7679 7680 def _parse_var_from_options( 7681 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7682 ) -> t.Optional[exp.Var]: 7683 start = self._curr 7684 if not start: 7685 return None 7686 7687 option = start.text.upper() 7688 continuations = options.get(option) 7689 7690 index = self._index 7691 self._advance() 7692 for keywords in continuations or []: 7693 if isinstance(keywords, str): 7694 keywords = (keywords,) 7695 7696 if self._match_text_seq(*keywords): 7697 option = f"{option} {' '.join(keywords)}" 7698 break 7699 else: 7700 if continuations or continuations is None: 7701 if raise_unmatched: 7702 self.raise_error(f"Unknown option {option}") 7703 7704 self._retreat(index) 7705 return None 7706 7707 return exp.var(option) 7708 7709 def _parse_as_command(self, start: Token) -> exp.Command: 7710 while self._curr: 7711 self._advance() 7712 text = self._find_sql(start, self._prev) 7713 size = len(start.text) 7714 self._warn_unsupported() 7715 return exp.Command(this=text[:size], expression=text[size:]) 7716 7717 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7718 settings = [] 7719 7720 self._match_l_paren() 7721 kind = self._parse_id_var() 7722 7723 if self._match(TokenType.L_PAREN): 7724 while True: 7725 key = self._parse_id_var() 7726 value = self._parse_primary() 7727 if not key and value is None: 7728 break 7729 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7730 self._match(TokenType.R_PAREN) 7731 7732 self._match_r_paren() 7733 7734 return self.expression( 7735 exp.DictProperty, 7736 this=this, 7737 kind=kind.this if kind else None, 7738 settings=settings, 7739 ) 7740 7741 def _parse_dict_range(self, this: str) -> exp.DictRange: 7742 self._match_l_paren() 7743 has_min = self._match_text_seq("MIN") 7744 if has_min: 7745 min = self._parse_var() or self._parse_primary() 7746 self._match_text_seq("MAX") 7747 max = self._parse_var() or self._parse_primary() 7748 else: 7749 max = self._parse_var() or self._parse_primary() 7750 min = exp.Literal.number(0) 7751 self._match_r_paren() 7752 return self.expression(exp.DictRange, this=this, min=min, max=max) 7753 7754 def _parse_comprehension( 7755 self, this: t.Optional[exp.Expression] 7756 ) -> t.Optional[exp.Comprehension]: 7757 index = self._index 7758 expression = self._parse_column() 7759 if not self._match(TokenType.IN): 7760 self._retreat(index - 1) 7761 return None 7762 iterator = self._parse_column() 7763 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7764 return self.expression( 7765 exp.Comprehension, 7766 this=this, 7767 expression=expression, 7768 iterator=iterator, 7769 condition=condition, 7770 ) 7771 7772 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7773 if self._match(TokenType.HEREDOC_STRING): 7774 return self.expression(exp.Heredoc, this=self._prev.text) 7775 7776 if not self._match_text_seq("$"): 7777 return None 7778 7779 tags = ["$"] 7780 tag_text = None 7781 7782 if self._is_connected(): 7783 self._advance() 7784 tags.append(self._prev.text.upper()) 7785 else: 7786 self.raise_error("No closing $ found") 7787 7788 if tags[-1] != "$": 7789 if self._is_connected() and self._match_text_seq("$"): 7790 tag_text = tags[-1] 7791 tags.append("$") 7792 else: 7793 self.raise_error("No closing $ found") 7794 7795 heredoc_start = self._curr 7796 7797 while self._curr: 7798 if self._match_text_seq(*tags, advance=False): 7799 this = self._find_sql(heredoc_start, self._prev) 7800 self._advance(len(tags)) 7801 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7802 7803 self._advance() 7804 7805 self.raise_error(f"No closing {''.join(tags)} found") 7806 return None 7807 7808 def _find_parser( 7809 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7810 ) -> t.Optional[t.Callable]: 7811 if not self._curr: 7812 return None 7813 7814 index = self._index 7815 this = [] 7816 while True: 7817 # The current token might be multiple words 7818 curr = self._curr.text.upper() 7819 key = curr.split(" ") 7820 this.append(curr) 7821 7822 self._advance() 7823 result, trie = in_trie(trie, key) 7824 if result == TrieResult.FAILED: 7825 break 7826 7827 if result == TrieResult.EXISTS: 7828 subparser = parsers[" ".join(this)] 7829 return subparser 7830 7831 self._retreat(index) 7832 return None 7833 7834 def _match(self, token_type, advance=True, expression=None): 7835 if not self._curr: 7836 return None 7837 7838 if self._curr.token_type == token_type: 7839 if advance: 7840 self._advance() 7841 self._add_comments(expression) 7842 return True 7843 7844 return None 7845 7846 def _match_set(self, types, advance=True): 7847 if not self._curr: 7848 return None 7849 7850 if self._curr.token_type in types: 7851 if advance: 7852 self._advance() 7853 return True 7854 7855 return None 7856 7857 def _match_pair(self, token_type_a, token_type_b, advance=True): 7858 if not self._curr or not self._next: 7859 return None 7860 7861 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7862 if advance: 7863 self._advance(2) 7864 return True 7865 7866 return None 7867 7868 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7869 if not self._match(TokenType.L_PAREN, expression=expression): 7870 self.raise_error("Expecting (") 7871 7872 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7873 if not self._match(TokenType.R_PAREN, expression=expression): 7874 self.raise_error("Expecting )") 7875 7876 def _match_texts(self, texts, advance=True): 7877 if ( 7878 self._curr 7879 and self._curr.token_type != TokenType.STRING 7880 and self._curr.text.upper() in texts 7881 ): 7882 if advance: 7883 self._advance() 7884 return True 7885 return None 7886 7887 def _match_text_seq(self, *texts, advance=True): 7888 index = self._index 7889 for text in texts: 7890 if ( 7891 self._curr 7892 and self._curr.token_type != TokenType.STRING 7893 and self._curr.text.upper() == text 7894 ): 7895 self._advance() 7896 else: 7897 self._retreat(index) 7898 return None 7899 7900 if not advance: 7901 self._retreat(index) 7902 7903 return True 7904 7905 def _replace_lambda( 7906 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7907 ) -> t.Optional[exp.Expression]: 7908 if not node: 7909 return node 7910 7911 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7912 7913 for column in node.find_all(exp.Column): 7914 typ = lambda_types.get(column.parts[0].name) 7915 if typ is not None: 7916 dot_or_id = column.to_dot() if column.table else column.this 7917 7918 if typ: 7919 dot_or_id = self.expression( 7920 exp.Cast, 7921 this=dot_or_id, 7922 to=typ, 7923 ) 7924 7925 parent = column.parent 7926 7927 while isinstance(parent, exp.Dot): 7928 if not isinstance(parent.parent, exp.Dot): 7929 parent.replace(dot_or_id) 7930 break 7931 parent = parent.parent 7932 else: 7933 if column is node: 7934 node = dot_or_id 7935 else: 7936 column.replace(dot_or_id) 7937 return node 7938 7939 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7940 start = self._prev 7941 7942 # Not to be confused with TRUNCATE(number, decimals) function call 7943 if self._match(TokenType.L_PAREN): 7944 self._retreat(self._index - 2) 7945 return self._parse_function() 7946 7947 # Clickhouse supports TRUNCATE DATABASE as well 7948 is_database = self._match(TokenType.DATABASE) 7949 7950 self._match(TokenType.TABLE) 7951 7952 exists = self._parse_exists(not_=False) 7953 7954 expressions = self._parse_csv( 7955 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7956 ) 7957 7958 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7959 7960 if self._match_text_seq("RESTART", "IDENTITY"): 7961 identity = "RESTART" 7962 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7963 identity = "CONTINUE" 7964 else: 7965 identity = None 7966 7967 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7968 option = self._prev.text 7969 else: 7970 option = None 7971 7972 partition = self._parse_partition() 7973 7974 # Fallback case 7975 if self._curr: 7976 return self._parse_as_command(start) 7977 7978 return self.expression( 7979 exp.TruncateTable, 7980 expressions=expressions, 7981 is_database=is_database, 7982 exists=exists, 7983 cluster=cluster, 7984 identity=identity, 7985 option=option, 7986 partition=partition, 7987 ) 7988 7989 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7990 this = self._parse_ordered(self._parse_opclass) 7991 7992 if not self._match(TokenType.WITH): 7993 return this 7994 7995 op = self._parse_var(any_token=True) 7996 7997 return self.expression(exp.WithOperator, this=this, op=op) 7998 7999 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8000 self._match(TokenType.EQ) 8001 self._match(TokenType.L_PAREN) 8002 8003 opts: t.List[t.Optional[exp.Expression]] = [] 8004 option: exp.Expression | None 8005 while self._curr and not self._match(TokenType.R_PAREN): 8006 if self._match_text_seq("FORMAT_NAME", "="): 8007 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8008 option = self._parse_format_name() 8009 else: 8010 option = self._parse_property() 8011 8012 if option is None: 8013 self.raise_error("Unable to parse option") 8014 break 8015 8016 opts.append(option) 8017 8018 return opts 8019 8020 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8021 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8022 8023 options = [] 8024 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8025 option = self._parse_var(any_token=True) 8026 prev = self._prev.text.upper() 8027 8028 # Different dialects might separate options and values by white space, "=" and "AS" 8029 self._match(TokenType.EQ) 8030 self._match(TokenType.ALIAS) 8031 8032 param = self.expression(exp.CopyParameter, this=option) 8033 8034 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8035 TokenType.L_PAREN, advance=False 8036 ): 8037 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8038 param.set("expressions", self._parse_wrapped_options()) 8039 elif prev == "FILE_FORMAT": 8040 # T-SQL's external file format case 8041 param.set("expression", self._parse_field()) 8042 else: 8043 param.set("expression", self._parse_unquoted_field()) 8044 8045 options.append(param) 8046 self._match(sep) 8047 8048 return options 8049 8050 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8051 expr = self.expression(exp.Credentials) 8052 8053 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8054 expr.set("storage", self._parse_field()) 8055 if self._match_text_seq("CREDENTIALS"): 8056 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8057 creds = ( 8058 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8059 ) 8060 expr.set("credentials", creds) 8061 if self._match_text_seq("ENCRYPTION"): 8062 expr.set("encryption", self._parse_wrapped_options()) 8063 if self._match_text_seq("IAM_ROLE"): 8064 expr.set("iam_role", self._parse_field()) 8065 if self._match_text_seq("REGION"): 8066 expr.set("region", self._parse_field()) 8067 8068 return expr 8069 8070 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8071 return self._parse_field() 8072 8073 def _parse_copy(self) -> exp.Copy | exp.Command: 8074 start = self._prev 8075 8076 self._match(TokenType.INTO) 8077 8078 this = ( 8079 self._parse_select(nested=True, parse_subquery_alias=False) 8080 if self._match(TokenType.L_PAREN, advance=False) 8081 else self._parse_table(schema=True) 8082 ) 8083 8084 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8085 8086 files = self._parse_csv(self._parse_file_location) 8087 credentials = self._parse_credentials() 8088 8089 self._match_text_seq("WITH") 8090 8091 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8092 8093 # Fallback case 8094 if self._curr: 8095 return self._parse_as_command(start) 8096 8097 return self.expression( 8098 exp.Copy, 8099 this=this, 8100 kind=kind, 8101 credentials=credentials, 8102 files=files, 8103 params=params, 8104 ) 8105 8106 def _parse_normalize(self) -> exp.Normalize: 8107 return self.expression( 8108 exp.Normalize, 8109 this=self._parse_bitwise(), 8110 form=self._match(TokenType.COMMA) and self._parse_var(), 8111 ) 8112 8113 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8114 args = self._parse_csv(lambda: self._parse_lambda()) 8115 8116 this = seq_get(args, 0) 8117 decimals = seq_get(args, 1) 8118 8119 return expr_type( 8120 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8121 ) 8122 8123 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8124 if self._match_text_seq("COLUMNS", "(", advance=False): 8125 this = self._parse_function() 8126 if isinstance(this, exp.Columns): 8127 this.set("unpack", True) 8128 return this 8129 8130 return self.expression( 8131 exp.Star, 8132 **{ # type: ignore 8133 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8134 "replace": self._parse_star_op("REPLACE"), 8135 "rename": self._parse_star_op("RENAME"), 8136 }, 8137 ) 8138 8139 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8140 privilege_parts = [] 8141 8142 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8143 # (end of privilege list) or L_PAREN (start of column list) are met 8144 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8145 privilege_parts.append(self._curr.text.upper()) 8146 self._advance() 8147 8148 this = exp.var(" ".join(privilege_parts)) 8149 expressions = ( 8150 self._parse_wrapped_csv(self._parse_column) 8151 if self._match(TokenType.L_PAREN, advance=False) 8152 else None 8153 ) 8154 8155 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8156 8157 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8158 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8159 principal = self._parse_id_var() 8160 8161 if not principal: 8162 return None 8163 8164 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8165 8166 def _parse_grant(self) -> exp.Grant | exp.Command: 8167 start = self._prev 8168 8169 privileges = self._parse_csv(self._parse_grant_privilege) 8170 8171 self._match(TokenType.ON) 8172 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8173 8174 # Attempt to parse the securable e.g. MySQL allows names 8175 # such as "foo.*", "*.*" which are not easily parseable yet 8176 securable = self._try_parse(self._parse_table_parts) 8177 8178 if not securable or not self._match_text_seq("TO"): 8179 return self._parse_as_command(start) 8180 8181 principals = self._parse_csv(self._parse_grant_principal) 8182 8183 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8184 8185 if self._curr: 8186 return self._parse_as_command(start) 8187 8188 return self.expression( 8189 exp.Grant, 8190 privileges=privileges, 8191 kind=kind, 8192 securable=securable, 8193 principals=principals, 8194 grant_option=grant_option, 8195 ) 8196 8197 def _parse_overlay(self) -> exp.Overlay: 8198 return self.expression( 8199 exp.Overlay, 8200 **{ # type: ignore 8201 "this": self._parse_bitwise(), 8202 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8203 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8204 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8205 }, 8206 ) 8207 8208 def _parse_format_name(self) -> exp.Property: 8209 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8210 # for FILE_FORMAT = <format_name> 8211 return self.expression( 8212 exp.Property, 8213 this=exp.var("FORMAT_NAME"), 8214 value=self._parse_string() or self._parse_table_parts(), 8215 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1498 def __init__( 1499 self, 1500 error_level: t.Optional[ErrorLevel] = None, 1501 error_message_context: int = 100, 1502 max_errors: int = 3, 1503 dialect: DialectType = None, 1504 ): 1505 from sqlglot.dialects import Dialect 1506 1507 self.error_level = error_level or ErrorLevel.IMMEDIATE 1508 self.error_message_context = error_message_context 1509 self.max_errors = max_errors 1510 self.dialect = Dialect.get_or_raise(dialect) 1511 self.reset()
1523 def parse( 1524 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1525 ) -> t.List[t.Optional[exp.Expression]]: 1526 """ 1527 Parses a list of tokens and returns a list of syntax trees, one tree 1528 per parsed SQL statement. 1529 1530 Args: 1531 raw_tokens: The list of tokens. 1532 sql: The original SQL string, used to produce helpful debug messages. 1533 1534 Returns: 1535 The list of the produced syntax trees. 1536 """ 1537 return self._parse( 1538 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1539 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1541 def parse_into( 1542 self, 1543 expression_types: exp.IntoType, 1544 raw_tokens: t.List[Token], 1545 sql: t.Optional[str] = None, 1546 ) -> t.List[t.Optional[exp.Expression]]: 1547 """ 1548 Parses a list of tokens into a given Expression type. If a collection of Expression 1549 types is given instead, this method will try to parse the token list into each one 1550 of them, stopping at the first for which the parsing succeeds. 1551 1552 Args: 1553 expression_types: The expression type(s) to try and parse the token list into. 1554 raw_tokens: The list of tokens. 1555 sql: The original SQL string, used to produce helpful debug messages. 1556 1557 Returns: 1558 The target Expression. 1559 """ 1560 errors = [] 1561 for expression_type in ensure_list(expression_types): 1562 parser = self.EXPRESSION_PARSERS.get(expression_type) 1563 if not parser: 1564 raise TypeError(f"No parser registered for {expression_type}") 1565 1566 try: 1567 return self._parse(parser, raw_tokens, sql) 1568 except ParseError as e: 1569 e.errors[0]["into_expression"] = expression_type 1570 errors.append(e) 1571 1572 raise ParseError( 1573 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1574 errors=merge_errors(errors), 1575 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1615 def check_errors(self) -> None: 1616 """Logs or raises any found errors, depending on the chosen error level setting.""" 1617 if self.error_level == ErrorLevel.WARN: 1618 for error in self.errors: 1619 logger.error(str(error)) 1620 elif self.error_level == ErrorLevel.RAISE and self.errors: 1621 raise ParseError( 1622 concat_messages(self.errors, self.max_errors), 1623 errors=merge_errors(self.errors), 1624 )
Logs or raises any found errors, depending on the chosen error level setting.
1626 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1627 """ 1628 Appends an error in the list of recorded errors or raises it, depending on the chosen 1629 error level setting. 1630 """ 1631 token = token or self._curr or self._prev or Token.string("") 1632 start = token.start 1633 end = token.end + 1 1634 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1635 highlight = self.sql[start:end] 1636 end_context = self.sql[end : end + self.error_message_context] 1637 1638 error = ParseError.new( 1639 f"{message}. Line {token.line}, Col: {token.col}.\n" 1640 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1641 description=message, 1642 line=token.line, 1643 col=token.col, 1644 start_context=start_context, 1645 highlight=highlight, 1646 end_context=end_context, 1647 ) 1648 1649 if self.error_level == ErrorLevel.IMMEDIATE: 1650 raise error 1651 1652 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1654 def expression( 1655 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1656 ) -> E: 1657 """ 1658 Creates a new, validated Expression. 1659 1660 Args: 1661 exp_class: The expression class to instantiate. 1662 comments: An optional list of comments to attach to the expression. 1663 kwargs: The arguments to set for the expression along with their respective values. 1664 1665 Returns: 1666 The target expression. 1667 """ 1668 instance = exp_class(**kwargs) 1669 instance.add_comments(comments) if comments else self._add_comments(instance) 1670 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1677 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1678 """ 1679 Validates an Expression, making sure that all its mandatory arguments are set. 1680 1681 Args: 1682 expression: The expression to validate. 1683 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1684 1685 Returns: 1686 The validated expression. 1687 """ 1688 if self.error_level != ErrorLevel.IGNORE: 1689 for error_message in expression.error_messages(args): 1690 self.raise_error(error_message) 1691 1692 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4654 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4655 start = self._index 4656 _, side_token, kind_token = self._parse_join_parts() 4657 4658 side = side_token.text if side_token else None 4659 kind = kind_token.text if kind_token else None 4660 4661 if not self._match_set(self.SET_OPERATIONS): 4662 self._retreat(start) 4663 return None 4664 4665 token_type = self._prev.token_type 4666 4667 if token_type == TokenType.UNION: 4668 operation: t.Type[exp.SetOperation] = exp.Union 4669 elif token_type == TokenType.EXCEPT: 4670 operation = exp.Except 4671 else: 4672 operation = exp.Intersect 4673 4674 comments = self._prev.comments 4675 4676 if self._match(TokenType.DISTINCT): 4677 distinct: t.Optional[bool] = True 4678 elif self._match(TokenType.ALL): 4679 distinct = False 4680 else: 4681 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4682 if distinct is None: 4683 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4684 4685 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4686 "STRICT", "CORRESPONDING" 4687 ) 4688 if self._match_text_seq("CORRESPONDING"): 4689 by_name = True 4690 if not side and not kind: 4691 kind = "INNER" 4692 4693 on_column_list = None 4694 if by_name and self._match_texts(("ON", "BY")): 4695 on_column_list = self._parse_wrapped_csv(self._parse_column) 4696 4697 expression = self._parse_select(nested=True, parse_set_operation=False) 4698 4699 return self.expression( 4700 operation, 4701 comments=comments, 4702 this=this, 4703 distinct=distinct, 4704 by_name=by_name, 4705 expression=expression, 4706 side=side, 4707 kind=kind, 4708 on=on_column_list, 4709 )